summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xconfigure.ac7
-rw-r--r--vtysh/Makefile.am3
-rw-r--r--zebra/Makefile.am7
-rw-r--r--zebra/debug.c37
-rw-r--r--zebra/debug.h5
-rw-r--r--zebra/main.c7
-rw-r--r--zebra/misc_null.c7
-rw-r--r--zebra/rib.h18
-rw-r--r--zebra/zebra_fpm.c1581
-rw-r--r--zebra/zebra_fpm.h34
-rw-r--r--zebra/zebra_fpm_netlink.c552
-rw-r--r--zebra/zebra_fpm_private.h56
-rw-r--r--zebra/zebra_rib.c32
13 files changed, 2344 insertions, 2 deletions
diff --git a/configure.ac b/configure.ac
index 425fe273..9bbe89f3 100755
--- a/configure.ac
+++ b/configure.ac
@@ -272,6 +272,8 @@ AC_ARG_ENABLE(time-check,
[ --disable-time-check disable slow thread warning messages])
AC_ARG_ENABLE(pcreposix,
[ --enable-pcreposix enable using PCRE Posix libs for regex functions])
+AC_ARG_ENABLE(fpm,
+[ --enable-fpm enable Forwarding Plane Manager support])
if test x"${enable_gcc_ultra_verbose}" = x"yes" ; then
CFLAGS="${CFLAGS} -W -Wcast-qual -Wstrict-prototypes"
@@ -292,6 +294,10 @@ if test x"${enable_time_check}" != x"no" ; then
fi
fi
+if test "${enable_fpm}" = "yes"; then
+ AC_DEFINE(HAVE_FPM,,Forwarding Plane Manager support)
+fi
+
if test "${enable_broken_aliases}" = "yes"; then
if test "${enable_netlink}" = "yes"
then
@@ -828,6 +834,7 @@ fi
AC_SUBST(RT_METHOD)
AC_SUBST(KERNEL_METHOD)
AC_SUBST(OTHER_METHOD)
+AM_CONDITIONAL([HAVE_NETLINK], [test "x$netlink" = "xyes"])
dnl --------------------------
dnl Determine IS-IS I/O method
diff --git a/vtysh/Makefile.am b/vtysh/Makefile.am
index 7550173c..5c325ec2 100644
--- a/vtysh/Makefile.am
+++ b/vtysh/Makefile.am
@@ -33,7 +33,8 @@ vtysh_cmd_FILES = $(top_srcdir)/bgpd/*.c $(top_srcdir)/isisd/*.c \
$(top_srcdir)/zebra/irdp_interface.c \
$(top_srcdir)/zebra/rtadv.c $(top_srcdir)/zebra/zebra_vty.c \
$(top_srcdir)/zebra/zserv.c $(top_srcdir)/zebra/router-id.c \
- $(top_srcdir)/zebra/zebra_routemap.c
+ $(top_srcdir)/zebra/zebra_routemap.c \
+ $(top_srcdir)/zebra/zebra_fpm.c
vtysh_cmd.c: $(vtysh_cmd_FILES)
./$(EXTRA_DIST) $(vtysh_cmd_FILES) > vtysh_cmd.c
diff --git a/zebra/Makefile.am b/zebra/Makefile.am
index ea962bf4..266812f8 100644
--- a/zebra/Makefile.am
+++ b/zebra/Makefile.am
@@ -19,6 +19,10 @@ ioctl_method = @IOCTL_METHOD@
otherobj = $(ioctl_method) $(ipforward) $(if_method) $(if_proc) \
$(rt_method) $(rtread_method) $(kernel_method) $(other_method)
+if HAVE_NETLINK
+othersrc = zebra_fpm_netlink.c
+endif
+
AM_CFLAGS = $(PICFLAGS)
AM_LDFLAGS = $(PILDFLAGS)
@@ -29,7 +33,8 @@ noinst_PROGRAMS = testzebra
zebra_SOURCES = \
zserv.c main.c interface.c connected.c zebra_rib.c zebra_routemap.c \
redistribute.c debug.c rtadv.c zebra_snmp.c zebra_vty.c \
- irdp_main.c irdp_interface.c irdp_packet.c router-id.c
+ irdp_main.c irdp_interface.c irdp_packet.c router-id.c zebra_fpm.c \
+ $(othersrc)
testzebra_SOURCES = test_main.c zebra_rib.c interface.c connected.c debug.c \
zebra_vty.c \
diff --git a/zebra/debug.c b/zebra/debug.c
index 175029b8..7bfdb77d 100644
--- a/zebra/debug.c
+++ b/zebra/debug.c
@@ -29,6 +29,7 @@ unsigned long zebra_debug_event;
unsigned long zebra_debug_packet;
unsigned long zebra_debug_kernel;
unsigned long zebra_debug_rib;
+unsigned long zebra_debug_fpm;
DEFUN (show_debugging_zebra,
show_debugging_zebra_cmd,
@@ -71,6 +72,9 @@ DEFUN (show_debugging_zebra,
if (IS_ZEBRA_DEBUG_RIB_Q)
vty_out (vty, " Zebra RIB queue debugging is on%s", VTY_NEWLINE);
+ if (IS_ZEBRA_DEBUG_FPM)
+ vty_out (vty, " Zebra FPM debugging is on%s", VTY_NEWLINE);
+
return CMD_SUCCESS;
}
@@ -169,6 +173,17 @@ DEFUN (debug_zebra_rib_q,
return CMD_SUCCESS;
}
+DEFUN (debug_zebra_fpm,
+ debug_zebra_fpm_cmd,
+ "debug zebra fpm",
+ DEBUG_STR
+ "Zebra configuration\n"
+ "Debug zebra FPM events\n")
+{
+ SET_FLAG (zebra_debug_fpm, ZEBRA_DEBUG_FPM);
+ return CMD_SUCCESS;
+}
+
DEFUN (no_debug_zebra_events,
no_debug_zebra_events_cmd,
"no debug zebra events",
@@ -247,6 +262,18 @@ DEFUN (no_debug_zebra_rib_q,
return CMD_SUCCESS;
}
+DEFUN (no_debug_zebra_fpm,
+ no_debug_zebra_fpm_cmd,
+ "no debug zebra fpm",
+ NO_STR
+ DEBUG_STR
+ "Zebra configuration\n"
+ "Debug zebra FPM events\n")
+{
+ zebra_debug_fpm = 0;
+ return CMD_SUCCESS;
+}
+
/* Debug node. */
struct cmd_node debug_node =
{
@@ -302,6 +329,11 @@ config_write_debug (struct vty *vty)
vty_out (vty, "debug zebra rib queue%s", VTY_NEWLINE);
write++;
}
+ if (IS_ZEBRA_DEBUG_FPM)
+ {
+ vty_out (vty, "debug zebra fpm%s", VTY_NEWLINE);
+ write++;
+ }
return write;
}
@@ -312,6 +344,7 @@ zebra_debug_init (void)
zebra_debug_packet = 0;
zebra_debug_kernel = 0;
zebra_debug_rib = 0;
+ zebra_debug_fpm = 0;
install_node (&debug_node, config_write_debug);
@@ -325,11 +358,13 @@ zebra_debug_init (void)
install_element (ENABLE_NODE, &debug_zebra_kernel_cmd);
install_element (ENABLE_NODE, &debug_zebra_rib_cmd);
install_element (ENABLE_NODE, &debug_zebra_rib_q_cmd);
+ install_element (ENABLE_NODE, &debug_zebra_fpm_cmd);
install_element (ENABLE_NODE, &no_debug_zebra_events_cmd);
install_element (ENABLE_NODE, &no_debug_zebra_packet_cmd);
install_element (ENABLE_NODE, &no_debug_zebra_kernel_cmd);
install_element (ENABLE_NODE, &no_debug_zebra_rib_cmd);
install_element (ENABLE_NODE, &no_debug_zebra_rib_q_cmd);
+ install_element (ENABLE_NODE, &no_debug_zebra_fpm_cmd);
install_element (CONFIG_NODE, &debug_zebra_events_cmd);
install_element (CONFIG_NODE, &debug_zebra_packet_cmd);
@@ -338,9 +373,11 @@ zebra_debug_init (void)
install_element (CONFIG_NODE, &debug_zebra_kernel_cmd);
install_element (CONFIG_NODE, &debug_zebra_rib_cmd);
install_element (CONFIG_NODE, &debug_zebra_rib_q_cmd);
+ install_element (CONFIG_NODE, &debug_zebra_fpm_cmd);
install_element (CONFIG_NODE, &no_debug_zebra_events_cmd);
install_element (CONFIG_NODE, &no_debug_zebra_packet_cmd);
install_element (CONFIG_NODE, &no_debug_zebra_kernel_cmd);
install_element (CONFIG_NODE, &no_debug_zebra_rib_cmd);
install_element (CONFIG_NODE, &no_debug_zebra_rib_q_cmd);
+ install_element (CONFIG_NODE, &no_debug_zebra_fpm_cmd);
}
diff --git a/zebra/debug.h b/zebra/debug.h
index 9d9412bc..d9231a22 100644
--- a/zebra/debug.h
+++ b/zebra/debug.h
@@ -36,6 +36,8 @@
#define ZEBRA_DEBUG_RIB 0x01
#define ZEBRA_DEBUG_RIB_Q 0x02
+#define ZEBRA_DEBUG_FPM 0x01
+
/* Debug related macro. */
#define IS_ZEBRA_DEBUG_EVENT (zebra_debug_event & ZEBRA_DEBUG_EVENT)
@@ -49,10 +51,13 @@
#define IS_ZEBRA_DEBUG_RIB (zebra_debug_rib & ZEBRA_DEBUG_RIB)
#define IS_ZEBRA_DEBUG_RIB_Q (zebra_debug_rib & ZEBRA_DEBUG_RIB_Q)
+#define IS_ZEBRA_DEBUG_FPM (zebra_debug_fpm & ZEBRA_DEBUG_FPM)
+
extern unsigned long zebra_debug_event;
extern unsigned long zebra_debug_packet;
extern unsigned long zebra_debug_kernel;
extern unsigned long zebra_debug_rib;
+extern unsigned long zebra_debug_fpm;
extern void zebra_debug_init (void);
diff --git a/zebra/main.c b/zebra/main.c
index 50ac224e..742e0292 100644
--- a/zebra/main.c
+++ b/zebra/main.c
@@ -39,6 +39,7 @@
#include "zebra/router-id.h"
#include "zebra/irdp.h"
#include "zebra/rtadv.h"
+#include "zebra/zebra_fpm.h"
/* Zebra instance */
struct zebra_t zebrad =
@@ -349,6 +350,12 @@ main (int argc, char **argv)
zebra_snmp_init ();
#endif /* HAVE_SNMP */
+#ifdef HAVE_FPM
+ zfpm_init (zebrad.master, 1, 0);
+#else
+ zfpm_init (zebrad.master, 0, 0);
+#endif
+
/* Process the configuration file. Among other configuration
* directives we can meet those installing static routes. Such
* requests will not be executed immediately, but queued in
diff --git a/zebra/misc_null.c b/zebra/misc_null.c
index 73594301..c8cc47d1 100644
--- a/zebra/misc_null.c
+++ b/zebra/misc_null.c
@@ -4,8 +4,15 @@
#include "zebra/rtadv.h"
#include "zebra/irdp.h"
#include "zebra/interface.h"
+#include "zebra/zebra_fpm.h"
void ifstat_update_proc (void) { return; }
#pragma weak rtadv_config_write = ifstat_update_proc
#pragma weak irdp_config_write = ifstat_update_proc
#pragma weak ifstat_update_sysctl = ifstat_update_proc
+
+void
+zfpm_trigger_update (struct route_node *rn, const char *reason)
+{
+ return;
+}
diff --git a/zebra/rib.h b/zebra/rib.h
index 4ecfaa0d..e16ce68a 100644
--- a/zebra/rib.h
+++ b/zebra/rib.h
@@ -25,6 +25,7 @@
#include "prefix.h"
#include "table.h"
+#include "queue.h"
#define DISTANCE_INFINITY 255
@@ -116,6 +117,11 @@ typedef struct rib_dest_t_
*/
u_int32_t flags;
+ /*
+ * Linkage to put dest on the FPM processing queue.
+ */
+ TAILQ_ENTRY(rib_dest_t_) fpm_q_entries;
+
} rib_dest_t;
#define RIB_ROUTE_QUEUED(x) (1 << (x))
@@ -126,6 +132,18 @@ typedef struct rib_dest_t_
#define ZEBRA_MAX_QINDEX (MQ_SIZE - 1)
/*
+ * This flag indicates that a given prefix has been 'advertised' to
+ * the FPM to be installed in the forwarding plane.
+ */
+#define RIB_DEST_SENT_TO_FPM (1 << (ZEBRA_MAX_QINDEX + 1))
+
+/*
+ * This flag is set when we need to send an update to the FPM about a
+ * dest.
+ */
+#define RIB_DEST_UPDATE_FPM (1 << (ZEBRA_MAX_QINDEX + 2))
+
+/*
* Macro to iterate over each route for a destination (prefix).
*/
#define RIB_DEST_FOREACH_ROUTE(dest, rib) \
diff --git a/zebra/zebra_fpm.c b/zebra/zebra_fpm.c
new file mode 100644
index 00000000..e02d1745
--- /dev/null
+++ b/zebra/zebra_fpm.c
@@ -0,0 +1,1581 @@
+/*
+ * Main implementation file for interface to Forwarding Plane Manager.
+ *
+ * Copyright (C) 2012 by Open Source Routing.
+ * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC")
+ *
+ * This file is part of GNU Zebra.
+ *
+ * GNU Zebra is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * GNU Zebra is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Zebra; see the file COPYING. If not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <zebra.h>
+
+#include "log.h"
+#include "stream.h"
+#include "thread.h"
+#include "network.h"
+#include "command.h"
+
+#include "zebra/rib.h"
+
+#include "fpm/fpm.h"
+#include "zebra_fpm.h"
+#include "zebra_fpm_private.h"
+
+/*
+ * Interval at which we attempt to connect to the FPM.
+ */
+#define ZFPM_CONNECT_RETRY_IVL 5
+
+/*
+ * Sizes of outgoing and incoming stream buffers for writing/reading
+ * FPM messages.
+ */
+#define ZFPM_OBUF_SIZE (2 * FPM_MAX_MSG_LEN)
+#define ZFPM_IBUF_SIZE (FPM_MAX_MSG_LEN)
+
+/*
+ * The maximum number of times the FPM socket write callback can call
+ * 'write' before it yields.
+ */
+#define ZFPM_MAX_WRITES_PER_RUN 10
+
+/*
+ * Interval over which we collect statistics.
+ */
+#define ZFPM_STATS_IVL_SECS 10
+
+/*
+ * Structure that holds state for iterating over all route_node
+ * structures that are candidates for being communicated to the FPM.
+ */
+typedef struct zfpm_rnodes_iter_t_
+{
+ rib_tables_iter_t tables_iter;
+ route_table_iter_t iter;
+} zfpm_rnodes_iter_t;
+
+/*
+ * Statistics.
+ */
+typedef struct zfpm_stats_t_ {
+ unsigned long connect_calls;
+ unsigned long connect_no_sock;
+
+ unsigned long read_cb_calls;
+
+ unsigned long write_cb_calls;
+ unsigned long write_calls;
+ unsigned long partial_writes;
+ unsigned long max_writes_hit;
+ unsigned long t_write_yields;
+
+ unsigned long nop_deletes_skipped;
+ unsigned long route_adds;
+ unsigned long route_dels;
+
+ unsigned long updates_triggered;
+ unsigned long redundant_triggers;
+ unsigned long non_fpm_table_triggers;
+
+ unsigned long dests_del_after_update;
+
+ unsigned long t_conn_down_starts;
+ unsigned long t_conn_down_dests_processed;
+ unsigned long t_conn_down_yields;
+ unsigned long t_conn_down_finishes;
+
+ unsigned long t_conn_up_starts;
+ unsigned long t_conn_up_dests_processed;
+ unsigned long t_conn_up_yields;
+ unsigned long t_conn_up_aborts;
+ unsigned long t_conn_up_finishes;
+
+} zfpm_stats_t;
+
+/*
+ * States for the FPM state machine.
+ */
+typedef enum {
+
+ /*
+ * In this state we are not yet ready to connect to the FPM. This
+ * can happen when this module is disabled, or if we're cleaning up
+ * after a connection has gone down.
+ */
+ ZFPM_STATE_IDLE,
+
+ /*
+ * Ready to talk to the FPM and periodically trying to connect to
+ * it.
+ */
+ ZFPM_STATE_ACTIVE,
+
+ /*
+ * In the middle of bringing up a TCP connection. Specifically,
+ * waiting for a connect() call to complete asynchronously.
+ */
+ ZFPM_STATE_CONNECTING,
+
+ /*
+ * TCP connection to the FPM is up.
+ */
+ ZFPM_STATE_ESTABLISHED
+
+} zfpm_state_t;
+
+/*
+ * Globals.
+ */
+typedef struct zfpm_glob_t_
+{
+
+ /*
+ * True if the FPM module has been enabled.
+ */
+ int enabled;
+
+ struct thread_master *master;
+
+ zfpm_state_t state;
+
+ /*
+ * Port on which the FPM is running.
+ */
+ int fpm_port;
+
+ /*
+ * List of rib_dest_t structures to be processed
+ */
+ TAILQ_HEAD (zfpm_dest_q, rib_dest_t_) dest_q;
+
+ /*
+ * Stream socket to the FPM.
+ */
+ int sock;
+
+ /*
+ * Buffers for messages to/from the FPM.
+ */
+ struct stream *obuf;
+ struct stream *ibuf;
+
+ /*
+ * Threads for I/O.
+ */
+ struct thread *t_connect;
+ struct thread *t_write;
+ struct thread *t_read;
+
+ /*
+ * Thread to clean up after the TCP connection to the FPM goes down
+ * and the state that belongs to it.
+ */
+ struct thread *t_conn_down;
+
+ struct {
+ zfpm_rnodes_iter_t iter;
+ } t_conn_down_state;
+
+ /*
+ * Thread to take actions once the TCP conn to the FPM comes up, and
+ * the state that belongs to it.
+ */
+ struct thread *t_conn_up;
+
+ struct {
+ zfpm_rnodes_iter_t iter;
+ } t_conn_up_state;
+
+ unsigned long connect_calls;
+ time_t last_connect_call_time;
+
+ /*
+ * Stats from the start of the current statistics interval up to
+ * now. These are the counters we typically update in the code.
+ */
+ zfpm_stats_t stats;
+
+ /*
+ * Statistics that were gathered in the last collection interval.
+ */
+ zfpm_stats_t last_ivl_stats;
+
+ /*
+ * Cumulative stats from the last clear to the start of the current
+ * statistics interval.
+ */
+ zfpm_stats_t cumulative_stats;
+
+ /*
+ * Stats interval timer.
+ */
+ struct thread *t_stats;
+
+ /*
+ * If non-zero, the last time when statistics were cleared.
+ */
+ time_t last_stats_clear_time;
+
+} zfpm_glob_t;
+
+static zfpm_glob_t zfpm_glob_space;
+static zfpm_glob_t *zfpm_g = &zfpm_glob_space;
+
+static int zfpm_read_cb (struct thread *thread);
+static int zfpm_write_cb (struct thread *thread);
+
+static void zfpm_set_state (zfpm_state_t state, const char *reason);
+static void zfpm_start_connect_timer (const char *reason);
+static void zfpm_start_stats_timer (void);
+
+/*
+ * zfpm_thread_should_yield
+ */
+static inline int
+zfpm_thread_should_yield (struct thread *t)
+{
+ return thread_should_yield (t);
+}
+
+/*
+ * zfpm_state_to_str
+ */
+static const char *
+zfpm_state_to_str (zfpm_state_t state)
+{
+ switch (state)
+ {
+
+ case ZFPM_STATE_IDLE:
+ return "idle";
+
+ case ZFPM_STATE_ACTIVE:
+ return "active";
+
+ case ZFPM_STATE_CONNECTING:
+ return "connecting";
+
+ case ZFPM_STATE_ESTABLISHED:
+ return "established";
+
+ default:
+ return "unknown";
+ }
+}
+
+/*
+ * zfpm_get_time
+ */
+static time_t
+zfpm_get_time (void)
+{
+ struct timeval tv;
+
+ if (quagga_gettime (QUAGGA_CLK_MONOTONIC, &tv) < 0)
+ zlog_warn ("FPM: quagga_gettime failed!!");
+
+ return tv.tv_sec;
+}
+
+/*
+ * zfpm_get_elapsed_time
+ *
+ * Returns the time elapsed (in seconds) since the given time.
+ */
+static time_t
+zfpm_get_elapsed_time (time_t reference)
+{
+ time_t now;
+
+ now = zfpm_get_time ();
+
+ if (now < reference)
+ {
+ assert (0);
+ return 0;
+ }
+
+ return now - reference;
+}
+
+/*
+ * zfpm_is_table_for_fpm
+ *
+ * Returns TRUE if the the given table is to be communicated to the
+ * FPM.
+ */
+static inline int
+zfpm_is_table_for_fpm (struct route_table *table)
+{
+ rib_table_info_t *info;
+
+ info = rib_table_info (table);
+
+ /*
+ * We only send the unicast tables in the main instance to the FPM
+ * at this point.
+ */
+ if (info->vrf->id != 0)
+ return 0;
+
+ if (info->safi != SAFI_UNICAST)
+ return 0;
+
+ return 1;
+}
+
+/*
+ * zfpm_rnodes_iter_init
+ */
+static inline void
+zfpm_rnodes_iter_init (zfpm_rnodes_iter_t *iter)
+{
+ memset (iter, 0, sizeof (*iter));
+ rib_tables_iter_init (&iter->tables_iter);
+
+ /*
+ * This is a hack, but it makes implementing 'next' easier by
+ * ensuring that route_table_iter_next() will return NULL the first
+ * time we call it.
+ */
+ route_table_iter_init (&iter->iter, NULL);
+ route_table_iter_cleanup (&iter->iter);
+}
+
+/*
+ * zfpm_rnodes_iter_next
+ */
+static inline struct route_node *
+zfpm_rnodes_iter_next (zfpm_rnodes_iter_t *iter)
+{
+ struct route_node *rn;
+ struct route_table *table;
+
+ while (1)
+ {
+ rn = route_table_iter_next (&iter->iter);
+ if (rn)
+ return rn;
+
+ /*
+ * We've made our way through this table, go to the next one.
+ */
+ route_table_iter_cleanup (&iter->iter);
+
+ while ((table = rib_tables_iter_next (&iter->tables_iter)))
+ {
+ if (zfpm_is_table_for_fpm (table))
+ break;
+ }
+
+ if (!table)
+ return NULL;
+
+ route_table_iter_init (&iter->iter, table);
+ }
+
+ return NULL;
+}
+
+/*
+ * zfpm_rnodes_iter_pause
+ */
+static inline void
+zfpm_rnodes_iter_pause (zfpm_rnodes_iter_t *iter)
+{
+ route_table_iter_pause (&iter->iter);
+}
+
+/*
+ * zfpm_rnodes_iter_cleanup
+ */
+static inline void
+zfpm_rnodes_iter_cleanup (zfpm_rnodes_iter_t *iter)
+{
+ route_table_iter_cleanup (&iter->iter);
+ rib_tables_iter_cleanup (&iter->tables_iter);
+}
+
+/*
+ * zfpm_stats_init
+ *
+ * Initialize a statistics block.
+ */
+static inline void
+zfpm_stats_init (zfpm_stats_t *stats)
+{
+ memset (stats, 0, sizeof (*stats));
+}
+
+/*
+ * zfpm_stats_reset
+ */
+static inline void
+zfpm_stats_reset (zfpm_stats_t *stats)
+{
+ zfpm_stats_init (stats);
+}
+
+/*
+ * zfpm_stats_copy
+ */
+static inline void
+zfpm_stats_copy (const zfpm_stats_t *src, zfpm_stats_t *dest)
+{
+ memcpy (dest, src, sizeof (*dest));
+}
+
+/*
+ * zfpm_stats_compose
+ *
+ * Total up the statistics in two stats structures ('s1 and 's2') and
+ * return the result in the third argument, 'result'. Note that the
+ * pointer 'result' may be the same as 's1' or 's2'.
+ *
+ * For simplicity, the implementation below assumes that the stats
+ * structure is composed entirely of counters. This can easily be
+ * changed when necessary.
+ */
+static void
+zfpm_stats_compose (const zfpm_stats_t *s1, const zfpm_stats_t *s2,
+ zfpm_stats_t *result)
+{
+ const unsigned long *p1, *p2;
+ unsigned long *result_p;
+ int i, num_counters;
+
+ p1 = (const unsigned long *) s1;
+ p2 = (const unsigned long *) s2;
+ result_p = (unsigned long *) result;
+
+ num_counters = (sizeof (zfpm_stats_t) / sizeof (unsigned long));
+
+ for (i = 0; i < num_counters; i++)
+ {
+ result_p[i] = p1[i] + p2[i];
+ }
+}
+
+/*
+ * zfpm_read_on
+ */
+static inline void
+zfpm_read_on (void)
+{
+ assert (!zfpm_g->t_read);
+ assert (zfpm_g->sock >= 0);
+
+ THREAD_READ_ON (zfpm_g->master, zfpm_g->t_read, zfpm_read_cb, 0,
+ zfpm_g->sock);
+}
+
+/*
+ * zfpm_write_on
+ */
+static inline void
+zfpm_write_on (void)
+{
+ assert (!zfpm_g->t_write);
+ assert (zfpm_g->sock >= 0);
+
+ THREAD_WRITE_ON (zfpm_g->master, zfpm_g->t_write, zfpm_write_cb, 0,
+ zfpm_g->sock);
+}
+
+/*
+ * zfpm_read_off
+ */
+static inline void
+zfpm_read_off (void)
+{
+ THREAD_READ_OFF (zfpm_g->t_read);
+}
+
+/*
+ * zfpm_write_off
+ */
+static inline void
+zfpm_write_off (void)
+{
+ THREAD_WRITE_OFF (zfpm_g->t_write);
+}
+
+/*
+ * zfpm_conn_up_thread_cb
+ *
+ * Callback for actions to be taken when the connection to the FPM
+ * comes up.
+ */
+static int
+zfpm_conn_up_thread_cb (struct thread *thread)
+{
+ struct route_node *rnode;
+ zfpm_rnodes_iter_t *iter;
+ rib_dest_t *dest;
+
+ assert (zfpm_g->t_conn_up);
+ zfpm_g->t_conn_up = NULL;
+
+ iter = &zfpm_g->t_conn_up_state.iter;
+
+ if (zfpm_g->state != ZFPM_STATE_ESTABLISHED)
+ {
+ zfpm_debug ("Connection not up anymore, conn_up thread aborting");
+ zfpm_g->stats.t_conn_up_aborts++;
+ goto done;
+ }
+
+ while ((rnode = zfpm_rnodes_iter_next (iter)))
+ {
+ dest = rib_dest_from_rnode (rnode);
+
+ if (dest)
+ {
+ zfpm_g->stats.t_conn_up_dests_processed++;
+ zfpm_trigger_update (rnode, NULL);
+ }
+
+ /*
+ * Yield if need be.
+ */
+ if (!zfpm_thread_should_yield (thread))
+ continue;
+
+ zfpm_g->stats.t_conn_up_yields++;
+ zfpm_rnodes_iter_pause (iter);
+ zfpm_g->t_conn_up = thread_add_background (zfpm_g->master,
+ zfpm_conn_up_thread_cb,
+ 0, 0);
+ return 0;
+ }
+
+ zfpm_g->stats.t_conn_up_finishes++;
+
+ done:
+ zfpm_rnodes_iter_cleanup (iter);
+ return 0;
+}
+
+/*
+ * zfpm_connection_up
+ *
+ * Called when the connection to the FPM comes up.
+ */
+static void
+zfpm_connection_up (const char *detail)
+{
+ assert (zfpm_g->sock >= 0);
+ zfpm_read_on ();
+ zfpm_write_on ();
+ zfpm_set_state (ZFPM_STATE_ESTABLISHED, detail);
+
+ /*
+ * Start thread to push existing routes to the FPM.
+ */
+ assert (!zfpm_g->t_conn_up);
+
+ zfpm_rnodes_iter_init (&zfpm_g->t_conn_up_state.iter);
+
+ zfpm_debug ("Starting conn_up thread");
+ zfpm_g->t_conn_up = thread_add_background (zfpm_g->master,
+ zfpm_conn_up_thread_cb, 0, 0);
+ zfpm_g->stats.t_conn_up_starts++;
+}
+
+/*
+ * zfpm_connect_check
+ *
+ * Check if an asynchronous connect() to the FPM is complete.
+ */
+static void
+zfpm_connect_check ()
+{
+ int status;
+ socklen_t slen;
+ int ret;
+
+ zfpm_read_off ();
+ zfpm_write_off ();
+
+ slen = sizeof (status);
+ ret = getsockopt (zfpm_g->sock, SOL_SOCKET, SO_ERROR, (void *) &status,
+ &slen);
+
+ if (ret >= 0 && status == 0)
+ {
+ zfpm_connection_up ("async connect complete");
+ return;
+ }
+
+ /*
+ * getsockopt() failed or indicated an error on the socket.
+ */
+ close (zfpm_g->sock);
+ zfpm_g->sock = -1;
+
+ zfpm_start_connect_timer ("getsockopt() after async connect failed");
+ return;
+}
+
+/*
+ * zfpm_conn_down_thread_cb
+ *
+ * Callback that is invoked to clean up state after the TCP connection
+ * to the FPM goes down.
+ */
+static int
+zfpm_conn_down_thread_cb (struct thread *thread)
+{
+ struct route_node *rnode;
+ zfpm_rnodes_iter_t *iter;
+ rib_dest_t *dest;
+
+ assert (zfpm_g->state == ZFPM_STATE_IDLE);
+
+ assert (zfpm_g->t_conn_down);
+ zfpm_g->t_conn_down = NULL;
+
+ iter = &zfpm_g->t_conn_down_state.iter;
+
+ while ((rnode = zfpm_rnodes_iter_next (iter)))
+ {
+ dest = rib_dest_from_rnode (rnode);
+
+ if (dest)
+ {
+ if (CHECK_FLAG (dest->flags, RIB_DEST_UPDATE_FPM))
+ {
+ TAILQ_REMOVE (&zfpm_g->dest_q, dest, fpm_q_entries);
+ }
+
+ UNSET_FLAG (dest->flags, RIB_DEST_UPDATE_FPM);
+ UNSET_FLAG (dest->flags, RIB_DEST_SENT_TO_FPM);
+
+ zfpm_g->stats.t_conn_down_dests_processed++;
+
+ /*
+ * Check if the dest should be deleted.
+ */
+ rib_gc_dest(rnode);
+ }
+
+ /*
+ * Yield if need be.
+ */
+ if (!zfpm_thread_should_yield (thread))
+ continue;
+
+ zfpm_g->stats.t_conn_down_yields++;
+ zfpm_rnodes_iter_pause (iter);
+ zfpm_g->t_conn_down = thread_add_background (zfpm_g->master,
+ zfpm_conn_down_thread_cb,
+ 0, 0);
+ return 0;
+ }
+
+ zfpm_g->stats.t_conn_down_finishes++;
+ zfpm_rnodes_iter_cleanup (iter);
+
+ /*
+ * Start the process of connecting to the FPM again.
+ */
+ zfpm_start_connect_timer ("cleanup complete");
+ return 0;
+}
+
+/*
+ * zfpm_connection_down
+ *
+ * Called when the connection to the FPM has gone down.
+ */
+static void
+zfpm_connection_down (const char *detail)
+{
+ if (!detail)
+ detail = "unknown";
+
+ assert (zfpm_g->state == ZFPM_STATE_ESTABLISHED);
+
+ zlog_info ("connection to the FPM has gone down: %s", detail);
+
+ zfpm_read_off ();
+ zfpm_write_off ();
+
+ stream_reset (zfpm_g->ibuf);
+ stream_reset (zfpm_g->obuf);
+
+ if (zfpm_g->sock >= 0) {
+ close (zfpm_g->sock);
+ zfpm_g->sock = -1;
+ }
+
+ /*
+ * Start thread to clean up state after the connection goes down.
+ */
+ assert (!zfpm_g->t_conn_down);
+ zfpm_debug ("Starting conn_down thread");
+ zfpm_rnodes_iter_init (&zfpm_g->t_conn_down_state.iter);
+ zfpm_g->t_conn_down = thread_add_background (zfpm_g->master,
+ zfpm_conn_down_thread_cb, 0, 0);
+ zfpm_g->stats.t_conn_down_starts++;
+
+ zfpm_set_state (ZFPM_STATE_IDLE, detail);
+}
+
+/*
+ * zfpm_read_cb
+ */
+static int
+zfpm_read_cb (struct thread *thread)
+{
+ size_t already;
+ struct stream *ibuf;
+ uint16_t msg_len;
+ fpm_msg_hdr_t *hdr;
+
+ zfpm_g->stats.read_cb_calls++;
+ assert (zfpm_g->t_read);
+ zfpm_g->t_read = NULL;
+
+ /*
+ * Check if async connect is now done.
+ */
+ if (zfpm_g->state == ZFPM_STATE_CONNECTING)
+ {
+ zfpm_connect_check();
+ return 0;
+ }
+
+ assert (zfpm_g->state == ZFPM_STATE_ESTABLISHED);
+ assert (zfpm_g->sock >= 0);
+
+ ibuf = zfpm_g->ibuf;
+
+ already = stream_get_endp (ibuf);
+ if (already < FPM_MSG_HDR_LEN)
+ {
+ ssize_t nbyte;
+
+ nbyte = stream_read_try (ibuf, zfpm_g->sock, FPM_MSG_HDR_LEN - already);
+ if (nbyte == 0 || nbyte == -1)
+ {
+ zfpm_connection_down ("closed socket in read");
+ return 0;
+ }
+
+ if (nbyte != (ssize_t) (FPM_MSG_HDR_LEN - already))
+ goto done;
+
+ already = FPM_MSG_HDR_LEN;
+ }
+
+ stream_set_getp (ibuf, 0);
+
+ hdr = (fpm_msg_hdr_t *) stream_pnt (ibuf);
+
+ if (!fpm_msg_hdr_ok (hdr))
+ {
+ zfpm_connection_down ("invalid message header");
+ return 0;
+ }
+
+ msg_len = fpm_msg_len (hdr);
+
+ /*
+ * Read out the rest of the packet.
+ */
+ if (already < msg_len)
+ {
+ ssize_t nbyte;
+
+ nbyte = stream_read_try (ibuf, zfpm_g->sock, msg_len - already);
+
+ if (nbyte == 0 || nbyte == -1)
+ {
+ zfpm_connection_down ("failed to read message");
+ return 0;
+ }
+
+ if (nbyte != (ssize_t) (msg_len - already))
+ goto done;
+ }
+
+ zfpm_debug ("Read out a full fpm message");
+
+ /*
+ * Just throw it away for now.
+ */
+ stream_reset (ibuf);
+
+ done:
+ zfpm_read_on ();
+ return 0;
+}
+
+/*
+ * zfpm_writes_pending
+ *
+ * Returns TRUE if we may have something to write to the FPM.
+ */
+static int
+zfpm_writes_pending (void)
+{
+
+ /*
+ * Check if there is any data in the outbound buffer that has not
+ * been written to the socket yet.
+ */
+ if (stream_get_endp (zfpm_g->obuf) - stream_get_getp (zfpm_g->obuf))
+ return 1;
+
+ /*
+ * Check if there are any prefixes on the outbound queue.
+ */
+ if (!TAILQ_EMPTY (&zfpm_g->dest_q))
+ return 1;
+
+ return 0;
+}
+
+/*
+ * zfpm_encode_route
+ *
+ * Encode a message to the FPM with information about the given route.
+ *
+ * Returns the number of bytes written to the buffer. 0 or a negative
+ * value indicates an error.
+ */
+static inline int
+zfpm_encode_route (rib_dest_t *dest, struct rib *rib, char *in_buf,
+ size_t in_buf_len)
+{
+#ifndef HAVE_NETLINK
+ return 0;
+#else
+
+ int cmd;
+
+ cmd = rib ? RTM_NEWROUTE : RTM_DELROUTE;
+
+ return zfpm_netlink_encode_route (cmd, dest, rib, in_buf, in_buf_len);
+
+#endif /* HAVE_NETLINK */
+}
+
+/*
+ * zfpm_route_for_update
+ *
+ * Returns the rib that is to be sent to the FPM for a given dest.
+ */
+static struct rib *
+zfpm_route_for_update (rib_dest_t *dest)
+{
+ struct rib *rib;
+
+ RIB_DEST_FOREACH_ROUTE (dest, rib)
+ {
+ if (!CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELECTED))
+ continue;
+
+ return rib;
+ }
+
+ /*
+ * We have no route for this destination.
+ */
+ return NULL;
+}
+
+/*
+ * zfpm_build_updates
+ *
+ * Process the outgoing queue and write messages to the outbound
+ * buffer.
+ */
+static void
+zfpm_build_updates (void)
+{
+ struct stream *s;
+ rib_dest_t *dest;
+ unsigned char *buf, *data, *buf_end;
+ size_t msg_len;
+ size_t data_len;
+ fpm_msg_hdr_t *hdr;
+ struct rib *rib;
+ int is_add, write_msg;
+
+ s = zfpm_g->obuf;
+
+ assert (stream_empty (s));
+
+ do {
+
+ /*
+ * Make sure there is enough space to write another message.
+ */
+ if (STREAM_WRITEABLE (s) < FPM_MAX_MSG_LEN)
+ break;
+
+ buf = STREAM_DATA (s) + stream_get_endp (s);
+ buf_end = buf + STREAM_WRITEABLE (s);
+
+ dest = TAILQ_FIRST (&zfpm_g->dest_q);
+ if (!dest)
+ break;
+
+ assert (CHECK_FLAG (dest->flags, RIB_DEST_UPDATE_FPM));
+
+ hdr = (fpm_msg_hdr_t *) buf;
+ hdr->version = FPM_PROTO_VERSION;
+ hdr->msg_type = FPM_MSG_TYPE_NETLINK;
+
+ data = fpm_msg_data (hdr);
+
+ rib = zfpm_route_for_update (dest);
+ is_add = rib ? 1 : 0;
+
+ write_msg = 1;
+
+ /*
+ * If this is a route deletion, and we have not sent the route to
+ * the FPM previously, skip it.
+ */
+ if (!is_add && !CHECK_FLAG (dest->flags, RIB_DEST_SENT_TO_FPM))
+ {
+ write_msg = 0;
+ zfpm_g->stats.nop_deletes_skipped++;
+ }
+
+ if (write_msg) {
+ data_len = zfpm_encode_route (dest, rib, (char *) data, buf_end - data);
+
+ assert (data_len);
+ if (data_len)
+ {
+ msg_len = fpm_data_len_to_msg_len (data_len);
+ hdr->msg_len = htons (msg_len);
+ stream_forward_endp (s, msg_len);
+
+ if (is_add)
+ zfpm_g->stats.route_adds++;
+ else
+ zfpm_g->stats.route_dels++;
+ }
+ }
+
+ /*
+ * Remove the dest from the queue, and reset the flag.
+ */
+ UNSET_FLAG (dest->flags, RIB_DEST_UPDATE_FPM);
+ TAILQ_REMOVE (&zfpm_g->dest_q, dest, fpm_q_entries);
+
+ if (is_add)
+ {
+ SET_FLAG (dest->flags, RIB_DEST_SENT_TO_FPM);
+ }
+ else
+ {
+ UNSET_FLAG (dest->flags, RIB_DEST_SENT_TO_FPM);
+ }
+
+ /*
+ * Delete the destination if necessary.
+ */
+ if (rib_gc_dest (dest->rnode))
+ zfpm_g->stats.dests_del_after_update++;
+
+ } while (1);
+
+}
+
+/*
+ * zfpm_write_cb
+ */
+static int
+zfpm_write_cb (struct thread *thread)
+{
+ struct stream *s;
+ int num_writes;
+
+ zfpm_g->stats.write_cb_calls++;
+ assert (zfpm_g->t_write);
+ zfpm_g->t_write = NULL;
+
+ /*
+ * Check if async connect is now done.
+ */
+ if (zfpm_g->state == ZFPM_STATE_CONNECTING)
+ {
+ zfpm_connect_check ();
+ return 0;
+ }
+
+ assert (zfpm_g->state == ZFPM_STATE_ESTABLISHED);
+ assert (zfpm_g->sock >= 0);
+
+ num_writes = 0;
+
+ do
+ {
+ int bytes_to_write, bytes_written;
+
+ s = zfpm_g->obuf;
+
+ /*
+ * If the stream is empty, try fill it up with data.
+ */
+ if (stream_empty (s))
+ {
+ zfpm_build_updates ();
+ }
+
+ bytes_to_write = stream_get_endp (s) - stream_get_getp (s);
+ if (!bytes_to_write)
+ break;
+
+ bytes_written = write (zfpm_g->sock, STREAM_PNT (s), bytes_to_write);
+ zfpm_g->stats.write_calls++;
+ num_writes++;
+
+ if (bytes_written < 0)
+ {
+ if (ERRNO_IO_RETRY (errno))
+ break;
+
+ zfpm_connection_down ("failed to write to socket");
+ return 0;
+ }
+
+ if (bytes_written != bytes_to_write)
+ {
+
+ /*
+ * Partial write.
+ */
+ stream_forward_getp (s, bytes_written);
+ zfpm_g->stats.partial_writes++;
+ break;
+ }
+
+ /*
+ * We've written out the entire contents of the stream.
+ */
+ stream_reset (s);
+
+ if (num_writes >= ZFPM_MAX_WRITES_PER_RUN)
+ {
+ zfpm_g->stats.max_writes_hit++;
+ break;
+ }
+
+ if (zfpm_thread_should_yield (thread))
+ {
+ zfpm_g->stats.t_write_yields++;
+ break;
+ }
+ } while (1);
+
+ if (zfpm_writes_pending ())
+ zfpm_write_on ();
+
+ return 0;
+}
+
+/*
+ * zfpm_connect_cb
+ */
+static int
+zfpm_connect_cb (struct thread *t)
+{
+ int sock, ret;
+ struct sockaddr_in serv;
+
+ assert (zfpm_g->t_connect);
+ zfpm_g->t_connect = NULL;
+ assert (zfpm_g->state == ZFPM_STATE_ACTIVE);
+
+ sock = socket (AF_INET, SOCK_STREAM, 0);
+ if (sock < 0)
+ {
+ zfpm_debug ("Failed to create socket for connect(): %s", strerror(errno));
+ zfpm_g->stats.connect_no_sock++;
+ return 0;
+ }
+
+ set_nonblocking(sock);
+
+ /* Make server socket. */
+ memset (&serv, 0, sizeof (serv));
+ serv.sin_family = AF_INET;
+ serv.sin_port = htons (zfpm_g->fpm_port);
+#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN
+ serv.sin_len = sizeof (struct sockaddr_in);
+#endif /* HAVE_STRUCT_SOCKADDR_IN_SIN_LEN */
+ serv.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
+
+ /*
+ * Connect to the FPM.
+ */
+ zfpm_g->connect_calls++;
+ zfpm_g->stats.connect_calls++;
+ zfpm_g->last_connect_call_time = zfpm_get_time ();
+
+ ret = connect (sock, (struct sockaddr *) &serv, sizeof (serv));
+ if (ret >= 0)
+ {
+ zfpm_g->sock = sock;
+ zfpm_connection_up ("connect succeeded");
+ return 1;
+ }
+
+ if (errno == EINPROGRESS)
+ {
+ zfpm_g->sock = sock;
+ zfpm_read_on ();
+ zfpm_write_on ();
+ zfpm_set_state (ZFPM_STATE_CONNECTING, "async connect in progress");
+ return 0;
+ }
+
+ zlog_info ("can't connect to FPM %d: %s", sock, safe_strerror (errno));
+ close (sock);
+
+ /*
+ * Restart timer for retrying connection.
+ */
+ zfpm_start_connect_timer ("connect() failed");
+ return 0;
+}
+
+/*
+ * zfpm_set_state
+ *
+ * Move state machine into the given state.
+ */
+static void
+zfpm_set_state (zfpm_state_t state, const char *reason)
+{
+ zfpm_state_t cur_state = zfpm_g->state;
+
+ if (!reason)
+ reason = "Unknown";
+
+ if (state == cur_state)
+ return;
+
+ zfpm_debug("beginning state transition %s -> %s. Reason: %s",
+ zfpm_state_to_str (cur_state), zfpm_state_to_str (state),
+ reason);
+
+ switch (state) {
+
+ case ZFPM_STATE_IDLE:
+ assert (cur_state == ZFPM_STATE_ESTABLISHED);
+ break;
+
+ case ZFPM_STATE_ACTIVE:
+ assert (cur_state == ZFPM_STATE_IDLE ||
+ cur_state == ZFPM_STATE_CONNECTING);
+ assert (zfpm_g->t_connect);
+ break;
+
+ case ZFPM_STATE_CONNECTING:
+ assert (zfpm_g->sock);
+ assert (cur_state == ZFPM_STATE_ACTIVE);
+ assert (zfpm_g->t_read);
+ assert (zfpm_g->t_write);
+ break;
+
+ case ZFPM_STATE_ESTABLISHED:
+ assert (cur_state == ZFPM_STATE_ACTIVE ||
+ cur_state == ZFPM_STATE_CONNECTING);
+ assert (zfpm_g->sock);
+ assert (zfpm_g->t_read);
+ assert (zfpm_g->t_write);
+ break;
+ }
+
+ zfpm_g->state = state;
+}
+
+/*
+ * zfpm_calc_connect_delay
+ *
+ * Returns the number of seconds after which we should attempt to
+ * reconnect to the FPM.
+ */
+static long
+zfpm_calc_connect_delay (void)
+{
+ time_t elapsed;
+
+ /*
+ * Return 0 if this is our first attempt to connect.
+ */
+ if (zfpm_g->connect_calls == 0)
+ {
+ return 0;
+ }
+
+ elapsed = zfpm_get_elapsed_time (zfpm_g->last_connect_call_time);
+
+ if (elapsed > ZFPM_CONNECT_RETRY_IVL) {
+ return 0;
+ }
+
+ return ZFPM_CONNECT_RETRY_IVL - elapsed;
+}
+
+/*
+ * zfpm_start_connect_timer
+ */
+static void
+zfpm_start_connect_timer (const char *reason)
+{
+ long delay_secs;
+
+ assert (!zfpm_g->t_connect);
+ assert (zfpm_g->sock < 0);
+
+ assert(zfpm_g->state == ZFPM_STATE_IDLE ||
+ zfpm_g->state == ZFPM_STATE_ACTIVE ||
+ zfpm_g->state == ZFPM_STATE_CONNECTING);
+
+ delay_secs = zfpm_calc_connect_delay();
+ zfpm_debug ("scheduling connect in %ld seconds", delay_secs);
+
+ THREAD_TIMER_ON (zfpm_g->master, zfpm_g->t_connect, zfpm_connect_cb, 0,
+ delay_secs);
+ zfpm_set_state (ZFPM_STATE_ACTIVE, reason);
+}
+
+/*
+ * zfpm_is_enabled
+ *
+ * Returns TRUE if the zebra FPM module has been enabled.
+ */
+static inline int
+zfpm_is_enabled (void)
+{
+ return zfpm_g->enabled;
+}
+
+/*
+ * zfpm_conn_is_up
+ *
+ * Returns TRUE if the connection to the FPM is up.
+ */
+static inline int
+zfpm_conn_is_up (void)
+{
+ if (zfpm_g->state != ZFPM_STATE_ESTABLISHED)
+ return 0;
+
+ assert (zfpm_g->sock >= 0);
+
+ return 1;
+}
+
+/*
+ * zfpm_trigger_update
+ *
+ * The zebra code invokes this function to indicate that we should
+ * send an update to the FPM about the given route_node.
+ */
+void
+zfpm_trigger_update (struct route_node *rn, const char *reason)
+{
+ rib_dest_t *dest;
+ char buf[INET6_ADDRSTRLEN];
+
+ /*
+ * Ignore if the connection is down. We will update the FPM about
+ * all destinations once the connection comes up.
+ */
+ if (!zfpm_conn_is_up ())
+ return;
+
+ dest = rib_dest_from_rnode (rn);
+
+ /*
+ * Ignore the trigger if the dest is not in a table that we would
+ * send to the FPM.
+ */
+ if (!zfpm_is_table_for_fpm (rib_dest_table (dest)))
+ {
+ zfpm_g->stats.non_fpm_table_triggers++;
+ return;
+ }
+
+ if (CHECK_FLAG (dest->flags, RIB_DEST_UPDATE_FPM)) {
+ zfpm_g->stats.redundant_triggers++;
+ return;
+ }
+
+ if (reason)
+ {
+ zfpm_debug ("%s/%d triggering update to FPM - Reason: %s",
+ inet_ntop (rn->p.family, &rn->p.u.prefix, buf, sizeof (buf)),
+ rn->p.prefixlen, reason);
+ }
+
+ SET_FLAG (dest->flags, RIB_DEST_UPDATE_FPM);
+ TAILQ_INSERT_TAIL (&zfpm_g->dest_q, dest, fpm_q_entries);
+ zfpm_g->stats.updates_triggered++;
+
+ /*
+ * Make sure that writes are enabled.
+ */
+ if (zfpm_g->t_write)
+ return;
+
+ zfpm_write_on ();
+}
+
+/*
+ * zfpm_stats_timer_cb
+ */
+static int
+zfpm_stats_timer_cb (struct thread *t)
+{
+ assert (zfpm_g->t_stats);
+ zfpm_g->t_stats = NULL;
+
+ /*
+ * Remember the stats collected in the last interval for display
+ * purposes.
+ */
+ zfpm_stats_copy (&zfpm_g->stats, &zfpm_g->last_ivl_stats);
+
+ /*
+ * Add the current set of stats into the cumulative statistics.
+ */
+ zfpm_stats_compose (&zfpm_g->cumulative_stats, &zfpm_g->stats,
+ &zfpm_g->cumulative_stats);
+
+ /*
+ * Start collecting stats afresh over the next interval.
+ */
+ zfpm_stats_reset (&zfpm_g->stats);
+
+ zfpm_start_stats_timer ();
+
+ return 0;
+}
+
+/*
+ * zfpm_stop_stats_timer
+ */
+static void
+zfpm_stop_stats_timer (void)
+{
+ if (!zfpm_g->t_stats)
+ return;
+
+ zfpm_debug ("Stopping existing stats timer");
+ THREAD_TIMER_OFF (zfpm_g->t_stats);
+}
+
+/*
+ * zfpm_start_stats_timer
+ */
+void
+zfpm_start_stats_timer (void)
+{
+ assert (!zfpm_g->t_stats);
+
+ THREAD_TIMER_ON (zfpm_g->master, zfpm_g->t_stats, zfpm_stats_timer_cb, 0,
+ ZFPM_STATS_IVL_SECS);
+}
+
+/*
+ * Helper macro for zfpm_show_stats() below.
+ */
+#define ZFPM_SHOW_STAT(counter) \
+ do { \
+ vty_out (vty, "%-40s %10lu %16lu%s", #counter, total_stats.counter, \
+ zfpm_g->last_ivl_stats.counter, VTY_NEWLINE); \
+ } while (0)
+
+/*
+ * zfpm_show_stats
+ */
+static void
+zfpm_show_stats (struct vty *vty)
+{
+ zfpm_stats_t total_stats;
+ time_t elapsed;
+
+ vty_out (vty, "%s%-40s %10s Last %2d secs%s%s", VTY_NEWLINE, "Counter",
+ "Total", ZFPM_STATS_IVL_SECS, VTY_NEWLINE, VTY_NEWLINE);
+
+ /*
+ * Compute the total stats up to this instant.
+ */
+ zfpm_stats_compose (&zfpm_g->cumulative_stats, &zfpm_g->stats,
+ &total_stats);
+
+ ZFPM_SHOW_STAT (connect_calls);
+ ZFPM_SHOW_STAT (connect_no_sock);
+ ZFPM_SHOW_STAT (read_cb_calls);
+ ZFPM_SHOW_STAT (write_cb_calls);
+ ZFPM_SHOW_STAT (write_calls);
+ ZFPM_SHOW_STAT (partial_writes);
+ ZFPM_SHOW_STAT (max_writes_hit);
+ ZFPM_SHOW_STAT (t_write_yields);
+ ZFPM_SHOW_STAT (nop_deletes_skipped);
+ ZFPM_SHOW_STAT (route_adds);
+ ZFPM_SHOW_STAT (route_dels);
+ ZFPM_SHOW_STAT (updates_triggered);
+ ZFPM_SHOW_STAT (non_fpm_table_triggers);
+ ZFPM_SHOW_STAT (redundant_triggers);
+ ZFPM_SHOW_STAT (dests_del_after_update);
+ ZFPM_SHOW_STAT (t_conn_down_starts);
+ ZFPM_SHOW_STAT (t_conn_down_dests_processed);
+ ZFPM_SHOW_STAT (t_conn_down_yields);
+ ZFPM_SHOW_STAT (t_conn_down_finishes);
+ ZFPM_SHOW_STAT (t_conn_up_starts);
+ ZFPM_SHOW_STAT (t_conn_up_dests_processed);
+ ZFPM_SHOW_STAT (t_conn_up_yields);
+ ZFPM_SHOW_STAT (t_conn_up_aborts);
+ ZFPM_SHOW_STAT (t_conn_up_finishes);
+
+ if (!zfpm_g->last_stats_clear_time)
+ return;
+
+ elapsed = zfpm_get_elapsed_time (zfpm_g->last_stats_clear_time);
+
+ vty_out (vty, "%sStats were cleared %lu seconds ago%s", VTY_NEWLINE,
+ (unsigned long) elapsed, VTY_NEWLINE);
+}
+
+/*
+ * zfpm_clear_stats
+ */
+static void
+zfpm_clear_stats (struct vty *vty)
+{
+ if (!zfpm_is_enabled ())
+ {
+ vty_out (vty, "The FPM module is not enabled...%s", VTY_NEWLINE);
+ return;
+ }
+
+ zfpm_stats_reset (&zfpm_g->stats);
+ zfpm_stats_reset (&zfpm_g->last_ivl_stats);
+ zfpm_stats_reset (&zfpm_g->cumulative_stats);
+
+ zfpm_stop_stats_timer ();
+ zfpm_start_stats_timer ();
+
+ zfpm_g->last_stats_clear_time = zfpm_get_time();
+
+ vty_out (vty, "Cleared FPM stats%s", VTY_NEWLINE);
+}
+
+/*
+ * show_zebra_fpm_stats
+ */
+DEFUN (show_zebra_fpm_stats,
+ show_zebra_fpm_stats_cmd,
+ "show zebra fpm stats",
+ SHOW_STR
+ "Zebra information\n"
+ "Forwarding Path Manager information\n"
+ "Statistics\n")
+{
+ zfpm_show_stats (vty);
+ return CMD_SUCCESS;
+}
+
+/*
+ * clear_zebra_fpm_stats
+ */
+DEFUN (clear_zebra_fpm_stats,
+ clear_zebra_fpm_stats_cmd,
+ "clear zebra fpm stats",
+ CLEAR_STR
+ "Zebra information\n"
+ "Clear Forwarding Path Manager information\n"
+ "Statistics\n")
+{
+ zfpm_clear_stats (vty);
+ return CMD_SUCCESS;
+}
+
+/**
+ * zfpm_init
+ *
+ * One-time initialization of the Zebra FPM module.
+ *
+ * @param[in] port port at which FPM is running.
+ * @param[in] enable TRUE if the zebra FPM module should be enabled
+ *
+ * Returns TRUE on success.
+ */
+int
+zfpm_init (struct thread_master *master, int enable, uint16_t port)
+{
+ static int initialized = 0;
+
+ if (initialized) {
+ return 1;
+ }
+
+ initialized = 1;
+
+ memset (zfpm_g, 0, sizeof (*zfpm_g));
+ zfpm_g->master = master;
+ TAILQ_INIT(&zfpm_g->dest_q);
+ zfpm_g->sock = -1;
+ zfpm_g->state = ZFPM_STATE_IDLE;
+
+ /*
+ * Netlink must currently be available for the Zebra-FPM interface
+ * to be enabled.
+ */
+#ifndef HAVE_NETLINK
+ enable = 0;
+#endif
+
+ zfpm_g->enabled = enable;
+
+ zfpm_stats_init (&zfpm_g->stats);
+ zfpm_stats_init (&zfpm_g->last_ivl_stats);
+ zfpm_stats_init (&zfpm_g->cumulative_stats);
+
+ install_element (ENABLE_NODE, &show_zebra_fpm_stats_cmd);
+ install_element (ENABLE_NODE, &clear_zebra_fpm_stats_cmd);
+
+ if (!enable) {
+ return 1;
+ }
+
+ if (!port)
+ port = FPM_DEFAULT_PORT;
+
+ zfpm_g->fpm_port = port;
+
+ zfpm_g->obuf = stream_new (ZFPM_OBUF_SIZE);
+ zfpm_g->ibuf = stream_new (ZFPM_IBUF_SIZE);
+
+ zfpm_start_stats_timer ();
+ zfpm_start_connect_timer ("initialized");
+
+ return 1;
+}
diff --git a/zebra/zebra_fpm.h b/zebra/zebra_fpm.h
new file mode 100644
index 00000000..44dec028
--- /dev/null
+++ b/zebra/zebra_fpm.h
@@ -0,0 +1,34 @@
+/*
+ * Header file exported by the zebra FPM module to zebra.
+ *
+ * Copyright (C) 2012 by Open Source Routing.
+ * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC")
+ *
+ * This file is part of GNU Zebra.
+ *
+ * GNU Zebra is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * GNU Zebra is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Zebra; see the file COPYING. If not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef _ZEBRA_FPM_H
+#define _ZEBRA_FPM_H
+
+/*
+ * Externs.
+ */
+extern int zfpm_init (struct thread_master *master, int enable, uint16_t port);
+extern void zfpm_trigger_update (struct route_node *rn, const char *reason);
+
+#endif /* _ZEBRA_FPM_H */
diff --git a/zebra/zebra_fpm_netlink.c b/zebra/zebra_fpm_netlink.c
new file mode 100644
index 00000000..90d3afb2
--- /dev/null
+++ b/zebra/zebra_fpm_netlink.c
@@ -0,0 +1,552 @@
+/*
+ * Code for encoding/decoding FPM messages that are in netlink format.
+ *
+ * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
+ * Copyright (C) 2012 by Open Source Routing.
+ * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC")
+ *
+ * This file is part of GNU Zebra.
+ *
+ * GNU Zebra is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * GNU Zebra is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Zebra; see the file COPYING. If not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <zebra.h>
+
+#include "log.h"
+#include "rib.h"
+
+#include "rt_netlink.h"
+
+#include "zebra_fpm_private.h"
+
+/*
+ * addr_to_a
+ *
+ * Returns string representation of an address of the given AF.
+ */
+static inline const char *
+addr_to_a (u_char af, void *addr)
+{
+ if (!addr)
+ return "<No address>";
+
+ switch (af)
+ {
+
+ case AF_INET:
+ return inet_ntoa (*((struct in_addr *) addr));
+
+#ifdef HAVE_IPV6
+ case AF_INET6:
+ return inet6_ntoa (*((struct in6_addr *) addr));
+#endif
+
+ default:
+ return "<Addr in unknown AF>";
+ }
+}
+
+/*
+ * prefix_addr_to_a
+ *
+ * Convience wrapper that returns a human-readable string for the
+ * address in a prefix.
+ */
+static const char *
+prefix_addr_to_a (struct prefix *prefix)
+{
+ if (!prefix)
+ return "<No address>";
+
+ return addr_to_a (prefix->family, &prefix->u.prefix);
+}
+
+/*
+ * af_addr_size
+ *
+ * The size of an address in a given address family.
+ */
+static size_t
+af_addr_size (u_char af)
+{
+ switch (af)
+ {
+
+ case AF_INET:
+ return 4;
+
+#ifdef HAVE_IPV6
+ case AF_INET6:
+ return 16;
+#endif
+
+ default:
+ assert(0);
+ return 16;
+ }
+}
+
+/*
+ * netlink_nh_info_t
+ *
+ * Holds information about a single nexthop for netlink. These info
+ * structures are transient and may contain pointers into rib
+ * data structures for convenience.
+ */
+typedef struct netlink_nh_info_t_
+{
+ uint32_t if_index;
+ union g_addr *gateway;
+
+ /*
+ * Information from the struct nexthop from which this nh was
+ * derived. For debug purposes only.
+ */
+ int recursive;
+ enum nexthop_types_t type;
+} netlink_nh_info_t;
+
+/*
+ * netlink_route_info_t
+ *
+ * A structure for holding information for a netlink route message.
+ */
+typedef struct netlink_route_info_t_
+{
+ uint16_t nlmsg_type;
+ u_char rtm_type;
+ uint32_t rtm_table;
+ u_char rtm_protocol;
+ u_char af;
+ struct prefix *prefix;
+ uint32_t *metric;
+ int num_nhs;
+
+ /*
+ * Nexthop structures. We keep things simple for now by enforcing a
+ * maximum of 64 in case MULTIPATH_NUM is 0;
+ */
+ netlink_nh_info_t nhs[MAX (MULTIPATH_NUM, 64)];
+ union g_addr *pref_src;
+} netlink_route_info_t;
+
+/*
+ * netlink_route_info_add_nh
+ *
+ * Add information about the given nexthop to the given route info
+ * structure.
+ *
+ * Returns TRUE if a nexthop was added, FALSE otherwise.
+ */
+static int
+netlink_route_info_add_nh (netlink_route_info_t *ri, struct nexthop *nexthop)
+{
+ netlink_nh_info_t nhi;
+ union g_addr *src;
+
+ memset (&nhi, 0, sizeof (nhi));
+ src = NULL;
+
+ if (ri->num_nhs >= (int) ZEBRA_NUM_OF (ri->nhs))
+ return 0;
+
+ if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
+ {
+ nhi.recursive = 1;
+ nhi.type = nexthop->rtype;
+
+ if (nexthop->rtype == NEXTHOP_TYPE_IPV4
+ || nexthop->rtype == NEXTHOP_TYPE_IPV4_IFINDEX)
+ {
+ nhi.gateway = &nexthop->rgate;
+ if (nexthop->src.ipv4.s_addr)
+ src = &nexthop->src;
+ }
+
+#ifdef HAVE_IPV6
+ if (nexthop->rtype == NEXTHOP_TYPE_IPV6
+ || nexthop->rtype == NEXTHOP_TYPE_IPV6_IFINDEX
+ || nexthop->rtype == NEXTHOP_TYPE_IPV6_IFNAME)
+ {
+ nhi.gateway = &nexthop->rgate;
+ }
+#endif /* HAVE_IPV6 */
+
+ if (nexthop->rtype == NEXTHOP_TYPE_IFINDEX
+ || nexthop->rtype == NEXTHOP_TYPE_IFNAME
+ || nexthop->rtype == NEXTHOP_TYPE_IPV4_IFINDEX
+ || nexthop->rtype == NEXTHOP_TYPE_IPV6_IFINDEX
+ || nexthop->rtype == NEXTHOP_TYPE_IPV6_IFNAME)
+ {
+ nhi.if_index = nexthop->rifindex;
+ if ((nexthop->rtype == NEXTHOP_TYPE_IPV4_IFINDEX
+ || nexthop->rtype == NEXTHOP_TYPE_IFINDEX)
+ && nexthop->src.ipv4.s_addr)
+ src = &nexthop->src;
+ }
+
+ goto done;
+ }
+
+ nhi.recursive = 0;
+ nhi.type = nexthop->type;
+
+ if (nexthop->type == NEXTHOP_TYPE_IPV4
+ || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX)
+ {
+ nhi.gateway = &nexthop->gate;
+ if (nexthop->src.ipv4.s_addr)
+ src = &nexthop->src;
+ }
+
+#ifdef HAVE_IPV6
+ if (nexthop->type == NEXTHOP_TYPE_IPV6
+ || nexthop->type == NEXTHOP_TYPE_IPV6_IFNAME
+ || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX)
+ {
+ nhi.gateway = &nexthop->gate;
+ }
+#endif /* HAVE_IPV6 */
+ if (nexthop->type == NEXTHOP_TYPE_IFINDEX
+ || nexthop->type == NEXTHOP_TYPE_IFNAME
+ || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX)
+ {
+ nhi.if_index = nexthop->ifindex;
+
+ if (nexthop->src.ipv4.s_addr)
+ src = &nexthop->src;
+ }
+ else if (nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX
+ || nexthop->type == NEXTHOP_TYPE_IPV6_IFNAME)
+ {
+ nhi.if_index = nexthop->ifindex;
+ }
+
+ /*
+ * Fall through...
+ */
+
+ done:
+ if (!nhi.gateway && nhi.if_index == 0)
+ return 0;
+
+ /*
+ * We have a valid nhi. Copy the structure over to the route_info.
+ */
+ ri->nhs[ri->num_nhs] = nhi;
+ ri->num_nhs++;
+
+ if (src && !ri->pref_src)
+ ri->pref_src = src;
+
+ return 1;
+}
+
+/*
+ * netlink_proto_from_route_type
+ */
+static u_char
+netlink_proto_from_route_type (int type)
+{
+ switch (type)
+ {
+ case ZEBRA_ROUTE_KERNEL:
+ case ZEBRA_ROUTE_CONNECT:
+ return RTPROT_KERNEL;
+
+ default:
+ return RTPROT_ZEBRA;
+ }
+}
+
+/*
+ * netlink_route_info_fill
+ *
+ * Fill out the route information object from the given route.
+ *
+ * Returns TRUE on success and FALSE on failure.
+ */
+static int
+netlink_route_info_fill (netlink_route_info_t *ri, int cmd,
+ rib_dest_t *dest, struct rib *rib)
+{
+ struct nexthop *nexthop = NULL;
+ int discard;
+
+ memset (ri, 0, sizeof (*ri));
+
+ ri->prefix = rib_dest_prefix (dest);
+ ri->af = rib_dest_af (dest);
+
+ ri->nlmsg_type = cmd;
+ ri->rtm_table = rib_dest_vrf (dest)->id;
+ ri->rtm_protocol = RTPROT_UNSPEC;
+
+ /*
+ * An RTM_DELROUTE need not be accompanied by any nexthops,
+ * particularly in our communication with the FPM.
+ */
+ if (cmd == RTM_DELROUTE && !rib)
+ goto skip;
+
+ if (rib)
+ ri->rtm_protocol = netlink_proto_from_route_type (rib->type);
+
+ if ((rib->flags & ZEBRA_FLAG_BLACKHOLE) || (rib->flags & ZEBRA_FLAG_REJECT))
+ discard = 1;
+ else
+ discard = 0;
+
+ if (cmd == RTM_NEWROUTE)
+ {
+ if (discard)
+ {
+ if (rib->flags & ZEBRA_FLAG_BLACKHOLE)
+ ri->rtm_type = RTN_BLACKHOLE;
+ else if (rib->flags & ZEBRA_FLAG_REJECT)
+ ri->rtm_type = RTN_UNREACHABLE;
+ else
+ assert (0);
+ }
+ else
+ ri->rtm_type = RTN_UNICAST;
+ }
+
+ ri->metric = &rib->metric;
+
+ if (discard)
+ {
+ goto skip;
+ }
+
+ /* Multipath case. */
+ if (rib->nexthop_active_num == 1 || MULTIPATH_NUM == 1)
+ {
+ for (nexthop = rib->nexthop; nexthop; nexthop = nexthop->next)
+ {
+
+ if ((cmd == RTM_NEWROUTE
+ && CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_ACTIVE))
+ || (cmd == RTM_DELROUTE
+ && CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB)))
+ {
+ netlink_route_info_add_nh (ri, nexthop);
+ break;
+ }
+ }
+ }
+ else
+ {
+ for (nexthop = rib->nexthop;
+ nexthop && (MULTIPATH_NUM == 0 || ri->num_nhs < MULTIPATH_NUM);
+ nexthop = nexthop->next)
+ {
+ if ((cmd == RTM_NEWROUTE
+ && CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_ACTIVE))
+ || (cmd == RTM_DELROUTE
+ && CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB)))
+ {
+ netlink_route_info_add_nh (ri, nexthop);
+ }
+ }
+ }
+
+ /* If there is no useful nexthop then return. */
+ if (ri->num_nhs == 0)
+ {
+ zfpm_debug ("netlink_encode_route(): No useful nexthop.");
+ return 0;
+ }
+
+ skip:
+ return 1;
+}
+
+/*
+ * netlink_route_info_encode
+ *
+ * Returns the number of bytes written to the buffer. 0 or a negative
+ * value indicates an error.
+ */
+static int
+netlink_route_info_encode (netlink_route_info_t *ri, char *in_buf,
+ size_t in_buf_len)
+{
+ int bytelen;
+ int nexthop_num = 0;
+ size_t buf_offset;
+ netlink_nh_info_t *nhi;
+
+ struct
+ {
+ struct nlmsghdr n;
+ struct rtmsg r;
+ char buf[1];
+ } *req;
+
+ req = (void *) in_buf;
+
+ buf_offset = ((char *) req->buf) - ((char *) req);
+
+ if (in_buf_len < buf_offset) {
+ assert(0);
+ return 0;
+ }
+
+ memset (req, 0, buf_offset);
+
+ bytelen = af_addr_size (ri->af);
+
+ req->n.nlmsg_len = NLMSG_LENGTH (sizeof (struct rtmsg));
+ req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
+ req->n.nlmsg_type = ri->nlmsg_type;
+ req->r.rtm_family = ri->af;
+ req->r.rtm_table = ri->rtm_table;
+ req->r.rtm_dst_len = ri->prefix->prefixlen;
+ req->r.rtm_protocol = ri->rtm_protocol;
+ req->r.rtm_scope = RT_SCOPE_UNIVERSE;
+
+ addattr_l (&req->n, in_buf_len, RTA_DST, &ri->prefix->u.prefix, bytelen);
+
+ req->r.rtm_type = ri->rtm_type;
+
+ /* Metric. */
+ if (ri->metric)
+ addattr32 (&req->n, in_buf_len, RTA_PRIORITY, *ri->metric);
+
+ if (ri->num_nhs == 0)
+ goto done;
+
+ if (ri->num_nhs == 1)
+ {
+ nhi = &ri->nhs[0];
+
+ if (nhi->gateway)
+ {
+ addattr_l (&req->n, in_buf_len, RTA_GATEWAY, nhi->gateway,
+ bytelen);
+ }
+
+ if (nhi->if_index)
+ {
+ addattr32 (&req->n, in_buf_len, RTA_OIF, nhi->if_index);
+ }
+
+ goto done;
+
+ }
+
+ /*
+ * Multipath case.
+ */
+ char buf[NL_PKT_BUF_SIZE];
+ struct rtattr *rta = (void *) buf;
+ struct rtnexthop *rtnh;
+
+ rta->rta_type = RTA_MULTIPATH;
+ rta->rta_len = RTA_LENGTH (0);
+ rtnh = RTA_DATA (rta);
+
+ for (nexthop_num = 0; nexthop_num < ri->num_nhs; nexthop_num++)
+ {
+ nhi = &ri->nhs[nexthop_num];
+
+ rtnh->rtnh_len = sizeof (*rtnh);
+ rtnh->rtnh_flags = 0;
+ rtnh->rtnh_hops = 0;
+ rtnh->rtnh_ifindex = 0;
+ rta->rta_len += rtnh->rtnh_len;
+
+ if (nhi->gateway)
+ {
+ rta_addattr_l (rta, sizeof (buf), RTA_GATEWAY, nhi->gateway, bytelen);
+ rtnh->rtnh_len += sizeof (struct rtattr) + bytelen;
+ }
+
+ if (nhi->if_index)
+ {
+ rtnh->rtnh_ifindex = nhi->if_index;
+ }
+
+ rtnh = RTNH_NEXT (rtnh);
+ }
+
+ assert (rta->rta_len > RTA_LENGTH (0));
+ addattr_l (&req->n, in_buf_len, RTA_MULTIPATH, RTA_DATA (rta),
+ RTA_PAYLOAD (rta));
+
+done:
+
+ if (ri->pref_src)
+ {
+ addattr_l (&req->n, in_buf_len, RTA_PREFSRC, &ri->pref_src, bytelen);
+ }
+
+ assert (req->n.nlmsg_len < in_buf_len);
+ return req->n.nlmsg_len;
+}
+
+/*
+ * zfpm_log_route_info
+ *
+ * Helper function to log the information in a route_info structure.
+ */
+static void
+zfpm_log_route_info (netlink_route_info_t *ri, const char *label)
+{
+ netlink_nh_info_t *nhi;
+ int i;
+
+ zfpm_debug ("%s : %s %s/%d, Proto: %s, Metric: %u", label,
+ nl_msg_type_to_str (ri->nlmsg_type),
+ prefix_addr_to_a (ri->prefix), ri->prefix->prefixlen,
+ nl_rtproto_to_str (ri->rtm_protocol),
+ ri->metric ? *ri->metric : 0);
+
+ for (i = 0; i < ri->num_nhs; i++)
+ {
+ nhi = &ri->nhs[i];
+ zfpm_debug(" Intf: %u, Gateway: %s, Recursive: %s, Type: %s",
+ nhi->if_index, addr_to_a (ri->af, nhi->gateway),
+ nhi->recursive ? "yes" : "no",
+ nexthop_type_to_str (nhi->type));
+ }
+}
+
+/*
+ * zfpm_netlink_encode_route
+ *
+ * Create a netlink message corresponding to the given route in the
+ * given buffer space.
+ *
+ * Returns the number of bytes written to the buffer. 0 or a negative
+ * value indicates an error.
+ */
+int
+zfpm_netlink_encode_route (int cmd, rib_dest_t *dest, struct rib *rib,
+ char *in_buf, size_t in_buf_len)
+{
+ netlink_route_info_t ri_space, *ri;
+
+ ri = &ri_space;
+
+ if (!netlink_route_info_fill (ri, cmd, dest, rib))
+ return 0;
+
+ zfpm_log_route_info (ri, __FUNCTION__);
+
+ return netlink_route_info_encode (ri, in_buf, in_buf_len);
+}
diff --git a/zebra/zebra_fpm_private.h b/zebra/zebra_fpm_private.h
new file mode 100644
index 00000000..809a70a4
--- /dev/null
+++ b/zebra/zebra_fpm_private.h
@@ -0,0 +1,56 @@
+/*
+ * Private header file for the zebra FPM module.
+ *
+ * Copyright (C) 2012 by Open Source Routing.
+ * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC")
+ *
+ * This file is part of GNU Zebra.
+ *
+ * GNU Zebra is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * GNU Zebra is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Zebra; see the file COPYING. If not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef _ZEBRA_FPM_PRIVATE_H
+#define _ZEBRA_FPM_PRIVATE_H
+
+#include "zebra/debug.h"
+
+#if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L
+
+#define zfpm_debug(...) \
+ do { \
+ if (IS_ZEBRA_DEBUG_FPM) zlog_debug("FPM: " __VA_ARGS__); \
+ } while(0)
+
+#elif defined __GNUC__
+
+#define zfpm_debug(_args...) \
+ do { \
+ if (IS_ZEBRA_DEBUG_FPM) zlog_debug("FPM: " _args); \
+ } while(0)
+
+#else
+static inline void zfpm_debug(const char *format, ...) { return; }
+#endif
+
+
+/*
+ * Externs
+ */
+extern int
+zfpm_netlink_encode_route (int cmd, rib_dest_t *dest, struct rib *rib,
+ char *in_buf, size_t in_buf_len);
+
+#endif /* _ZEBRA_FPM_PRIVATE_H */
diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c
index 29977123..a75d7215 100644
--- a/zebra/zebra_rib.c
+++ b/zebra/zebra_rib.c
@@ -40,6 +40,7 @@
#include "zebra/zserv.h"
#include "zebra/redistribute.h"
#include "zebra/debug.h"
+#include "zebra/zebra_fpm.h"
/* Default rtm_table for all clients */
extern struct zebra_t zebrad;
@@ -961,6 +962,11 @@ rib_install_kernel (struct route_node *rn, struct rib *rib)
int ret = 0;
struct nexthop *nexthop;
+ /*
+ * Make sure we update the FPM any time we send new information to
+ * the kernel.
+ */
+ zfpm_trigger_update (rn, "installing in kernel");
switch (PREFIX_FAMILY (&rn->p))
{
case AF_INET:
@@ -988,6 +994,12 @@ rib_uninstall_kernel (struct route_node *rn, struct rib *rib)
int ret = 0;
struct nexthop *nexthop;
+ /*
+ * Make sure we update the FPM any time we send new information to
+ * the kernel.
+ */
+ zfpm_trigger_update (rn, "uninstalling from kernel");
+
switch (PREFIX_FAMILY (&rn->p))
{
case AF_INET:
@@ -1012,6 +1024,8 @@ rib_uninstall (struct route_node *rn, struct rib *rib)
{
if (CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELECTED))
{
+ zfpm_trigger_update (rn, "rib_uninstall");
+
redistribute_delete (&rn->p, rib);
if (! RIB_SYSTEM_ROUTE (rib))
rib_uninstall_kernel (rn, rib);
@@ -1034,6 +1048,14 @@ rib_can_delete_dest (rib_dest_t *dest)
return 0;
}
+ /*
+ * Don't delete the dest if we have to update the FPM about this
+ * prefix.
+ */
+ if (CHECK_FLAG (dest->flags, RIB_DEST_UPDATE_FPM) ||
+ CHECK_FLAG (dest->flags, RIB_DEST_SENT_TO_FPM))
+ return 0;
+
return 1;
}
@@ -1187,6 +1209,8 @@ rib_process (struct route_node *rn)
__func__, buf, rn->p.prefixlen, select, fib);
if (CHECK_FLAG (select->flags, ZEBRA_FLAG_CHANGED))
{
+ zfpm_trigger_update (rn, "updating existing route");
+
redistribute_delete (&rn->p, select);
if (! RIB_SYSTEM_ROUTE (select))
rib_uninstall_kernel (rn, select);
@@ -1228,6 +1252,9 @@ rib_process (struct route_node *rn)
if (IS_ZEBRA_DEBUG_RIB)
zlog_debug ("%s: %s/%d: Removing existing route, fib %p", __func__,
buf, rn->p.prefixlen, fib);
+
+ zfpm_trigger_update (rn, "removing existing route");
+
redistribute_delete (&rn->p, fib);
if (! RIB_SYSTEM_ROUTE (fib))
rib_uninstall_kernel (rn, fib);
@@ -1246,6 +1273,9 @@ rib_process (struct route_node *rn)
if (IS_ZEBRA_DEBUG_RIB)
zlog_debug ("%s: %s/%d: Adding route, select %p", __func__, buf,
rn->p.prefixlen, select);
+
+ zfpm_trigger_update (rn, "new route selected");
+
/* Set real nexthop. */
nexthop_active_update (rn, select, 1);
@@ -3081,6 +3111,8 @@ rib_close_table (struct route_table *table)
if (!CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELECTED))
continue;
+ zfpm_trigger_update (rn, NULL);
+
if (! RIB_SYSTEM_ROUTE (rib))
rib_uninstall_kernel (rn, rib);
}