diff options
Diffstat (limited to 'zebra')
-rw-r--r-- | zebra/Makefile.am | 7 | ||||
-rw-r--r-- | zebra/debug.c | 37 | ||||
-rw-r--r-- | zebra/debug.h | 5 | ||||
-rw-r--r-- | zebra/main.c | 7 | ||||
-rw-r--r-- | zebra/misc_null.c | 7 | ||||
-rw-r--r-- | zebra/rib.h | 18 | ||||
-rw-r--r-- | zebra/zebra_fpm.c | 1581 | ||||
-rw-r--r-- | zebra/zebra_fpm.h | 34 | ||||
-rw-r--r-- | zebra/zebra_fpm_netlink.c | 552 | ||||
-rw-r--r-- | zebra/zebra_fpm_private.h | 56 | ||||
-rw-r--r-- | zebra/zebra_rib.c | 32 |
11 files changed, 2335 insertions, 1 deletions
diff --git a/zebra/Makefile.am b/zebra/Makefile.am index ea962bf4..266812f8 100644 --- a/zebra/Makefile.am +++ b/zebra/Makefile.am @@ -19,6 +19,10 @@ ioctl_method = @IOCTL_METHOD@ otherobj = $(ioctl_method) $(ipforward) $(if_method) $(if_proc) \ $(rt_method) $(rtread_method) $(kernel_method) $(other_method) +if HAVE_NETLINK +othersrc = zebra_fpm_netlink.c +endif + AM_CFLAGS = $(PICFLAGS) AM_LDFLAGS = $(PILDFLAGS) @@ -29,7 +33,8 @@ noinst_PROGRAMS = testzebra zebra_SOURCES = \ zserv.c main.c interface.c connected.c zebra_rib.c zebra_routemap.c \ redistribute.c debug.c rtadv.c zebra_snmp.c zebra_vty.c \ - irdp_main.c irdp_interface.c irdp_packet.c router-id.c + irdp_main.c irdp_interface.c irdp_packet.c router-id.c zebra_fpm.c \ + $(othersrc) testzebra_SOURCES = test_main.c zebra_rib.c interface.c connected.c debug.c \ zebra_vty.c \ diff --git a/zebra/debug.c b/zebra/debug.c index 175029b8..7bfdb77d 100644 --- a/zebra/debug.c +++ b/zebra/debug.c @@ -29,6 +29,7 @@ unsigned long zebra_debug_event; unsigned long zebra_debug_packet; unsigned long zebra_debug_kernel; unsigned long zebra_debug_rib; +unsigned long zebra_debug_fpm; DEFUN (show_debugging_zebra, show_debugging_zebra_cmd, @@ -71,6 +72,9 @@ DEFUN (show_debugging_zebra, if (IS_ZEBRA_DEBUG_RIB_Q) vty_out (vty, " Zebra RIB queue debugging is on%s", VTY_NEWLINE); + if (IS_ZEBRA_DEBUG_FPM) + vty_out (vty, " Zebra FPM debugging is on%s", VTY_NEWLINE); + return CMD_SUCCESS; } @@ -169,6 +173,17 @@ DEFUN (debug_zebra_rib_q, return CMD_SUCCESS; } +DEFUN (debug_zebra_fpm, + debug_zebra_fpm_cmd, + "debug zebra fpm", + DEBUG_STR + "Zebra configuration\n" + "Debug zebra FPM events\n") +{ + SET_FLAG (zebra_debug_fpm, ZEBRA_DEBUG_FPM); + return CMD_SUCCESS; +} + DEFUN (no_debug_zebra_events, no_debug_zebra_events_cmd, "no debug zebra events", @@ -247,6 +262,18 @@ DEFUN (no_debug_zebra_rib_q, return CMD_SUCCESS; } +DEFUN (no_debug_zebra_fpm, + no_debug_zebra_fpm_cmd, + "no debug zebra fpm", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug zebra FPM events\n") +{ + zebra_debug_fpm = 0; + return CMD_SUCCESS; +} + /* Debug node. */ struct cmd_node debug_node = { @@ -302,6 +329,11 @@ config_write_debug (struct vty *vty) vty_out (vty, "debug zebra rib queue%s", VTY_NEWLINE); write++; } + if (IS_ZEBRA_DEBUG_FPM) + { + vty_out (vty, "debug zebra fpm%s", VTY_NEWLINE); + write++; + } return write; } @@ -312,6 +344,7 @@ zebra_debug_init (void) zebra_debug_packet = 0; zebra_debug_kernel = 0; zebra_debug_rib = 0; + zebra_debug_fpm = 0; install_node (&debug_node, config_write_debug); @@ -325,11 +358,13 @@ zebra_debug_init (void) install_element (ENABLE_NODE, &debug_zebra_kernel_cmd); install_element (ENABLE_NODE, &debug_zebra_rib_cmd); install_element (ENABLE_NODE, &debug_zebra_rib_q_cmd); + install_element (ENABLE_NODE, &debug_zebra_fpm_cmd); install_element (ENABLE_NODE, &no_debug_zebra_events_cmd); install_element (ENABLE_NODE, &no_debug_zebra_packet_cmd); install_element (ENABLE_NODE, &no_debug_zebra_kernel_cmd); install_element (ENABLE_NODE, &no_debug_zebra_rib_cmd); install_element (ENABLE_NODE, &no_debug_zebra_rib_q_cmd); + install_element (ENABLE_NODE, &no_debug_zebra_fpm_cmd); install_element (CONFIG_NODE, &debug_zebra_events_cmd); install_element (CONFIG_NODE, &debug_zebra_packet_cmd); @@ -338,9 +373,11 @@ zebra_debug_init (void) install_element (CONFIG_NODE, &debug_zebra_kernel_cmd); install_element (CONFIG_NODE, &debug_zebra_rib_cmd); install_element (CONFIG_NODE, &debug_zebra_rib_q_cmd); + install_element (CONFIG_NODE, &debug_zebra_fpm_cmd); install_element (CONFIG_NODE, &no_debug_zebra_events_cmd); install_element (CONFIG_NODE, &no_debug_zebra_packet_cmd); install_element (CONFIG_NODE, &no_debug_zebra_kernel_cmd); install_element (CONFIG_NODE, &no_debug_zebra_rib_cmd); install_element (CONFIG_NODE, &no_debug_zebra_rib_q_cmd); + install_element (CONFIG_NODE, &no_debug_zebra_fpm_cmd); } diff --git a/zebra/debug.h b/zebra/debug.h index 9d9412bc..d9231a22 100644 --- a/zebra/debug.h +++ b/zebra/debug.h @@ -36,6 +36,8 @@ #define ZEBRA_DEBUG_RIB 0x01 #define ZEBRA_DEBUG_RIB_Q 0x02 +#define ZEBRA_DEBUG_FPM 0x01 + /* Debug related macro. */ #define IS_ZEBRA_DEBUG_EVENT (zebra_debug_event & ZEBRA_DEBUG_EVENT) @@ -49,10 +51,13 @@ #define IS_ZEBRA_DEBUG_RIB (zebra_debug_rib & ZEBRA_DEBUG_RIB) #define IS_ZEBRA_DEBUG_RIB_Q (zebra_debug_rib & ZEBRA_DEBUG_RIB_Q) +#define IS_ZEBRA_DEBUG_FPM (zebra_debug_fpm & ZEBRA_DEBUG_FPM) + extern unsigned long zebra_debug_event; extern unsigned long zebra_debug_packet; extern unsigned long zebra_debug_kernel; extern unsigned long zebra_debug_rib; +extern unsigned long zebra_debug_fpm; extern void zebra_debug_init (void); diff --git a/zebra/main.c b/zebra/main.c index 50ac224e..742e0292 100644 --- a/zebra/main.c +++ b/zebra/main.c @@ -39,6 +39,7 @@ #include "zebra/router-id.h" #include "zebra/irdp.h" #include "zebra/rtadv.h" +#include "zebra/zebra_fpm.h" /* Zebra instance */ struct zebra_t zebrad = @@ -349,6 +350,12 @@ main (int argc, char **argv) zebra_snmp_init (); #endif /* HAVE_SNMP */ +#ifdef HAVE_FPM + zfpm_init (zebrad.master, 1, 0); +#else + zfpm_init (zebrad.master, 0, 0); +#endif + /* Process the configuration file. Among other configuration * directives we can meet those installing static routes. Such * requests will not be executed immediately, but queued in diff --git a/zebra/misc_null.c b/zebra/misc_null.c index 73594301..c8cc47d1 100644 --- a/zebra/misc_null.c +++ b/zebra/misc_null.c @@ -4,8 +4,15 @@ #include "zebra/rtadv.h" #include "zebra/irdp.h" #include "zebra/interface.h" +#include "zebra/zebra_fpm.h" void ifstat_update_proc (void) { return; } #pragma weak rtadv_config_write = ifstat_update_proc #pragma weak irdp_config_write = ifstat_update_proc #pragma weak ifstat_update_sysctl = ifstat_update_proc + +void +zfpm_trigger_update (struct route_node *rn, const char *reason) +{ + return; +} diff --git a/zebra/rib.h b/zebra/rib.h index 4ecfaa0d..e16ce68a 100644 --- a/zebra/rib.h +++ b/zebra/rib.h @@ -25,6 +25,7 @@ #include "prefix.h" #include "table.h" +#include "queue.h" #define DISTANCE_INFINITY 255 @@ -116,6 +117,11 @@ typedef struct rib_dest_t_ */ u_int32_t flags; + /* + * Linkage to put dest on the FPM processing queue. + */ + TAILQ_ENTRY(rib_dest_t_) fpm_q_entries; + } rib_dest_t; #define RIB_ROUTE_QUEUED(x) (1 << (x)) @@ -126,6 +132,18 @@ typedef struct rib_dest_t_ #define ZEBRA_MAX_QINDEX (MQ_SIZE - 1) /* + * This flag indicates that a given prefix has been 'advertised' to + * the FPM to be installed in the forwarding plane. + */ +#define RIB_DEST_SENT_TO_FPM (1 << (ZEBRA_MAX_QINDEX + 1)) + +/* + * This flag is set when we need to send an update to the FPM about a + * dest. + */ +#define RIB_DEST_UPDATE_FPM (1 << (ZEBRA_MAX_QINDEX + 2)) + +/* * Macro to iterate over each route for a destination (prefix). */ #define RIB_DEST_FOREACH_ROUTE(dest, rib) \ diff --git a/zebra/zebra_fpm.c b/zebra/zebra_fpm.c new file mode 100644 index 00000000..e02d1745 --- /dev/null +++ b/zebra/zebra_fpm.c @@ -0,0 +1,1581 @@ +/* + * Main implementation file for interface to Forwarding Plane Manager. + * + * Copyright (C) 2012 by Open Source Routing. + * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC") + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Zebra; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <zebra.h> + +#include "log.h" +#include "stream.h" +#include "thread.h" +#include "network.h" +#include "command.h" + +#include "zebra/rib.h" + +#include "fpm/fpm.h" +#include "zebra_fpm.h" +#include "zebra_fpm_private.h" + +/* + * Interval at which we attempt to connect to the FPM. + */ +#define ZFPM_CONNECT_RETRY_IVL 5 + +/* + * Sizes of outgoing and incoming stream buffers for writing/reading + * FPM messages. + */ +#define ZFPM_OBUF_SIZE (2 * FPM_MAX_MSG_LEN) +#define ZFPM_IBUF_SIZE (FPM_MAX_MSG_LEN) + +/* + * The maximum number of times the FPM socket write callback can call + * 'write' before it yields. + */ +#define ZFPM_MAX_WRITES_PER_RUN 10 + +/* + * Interval over which we collect statistics. + */ +#define ZFPM_STATS_IVL_SECS 10 + +/* + * Structure that holds state for iterating over all route_node + * structures that are candidates for being communicated to the FPM. + */ +typedef struct zfpm_rnodes_iter_t_ +{ + rib_tables_iter_t tables_iter; + route_table_iter_t iter; +} zfpm_rnodes_iter_t; + +/* + * Statistics. + */ +typedef struct zfpm_stats_t_ { + unsigned long connect_calls; + unsigned long connect_no_sock; + + unsigned long read_cb_calls; + + unsigned long write_cb_calls; + unsigned long write_calls; + unsigned long partial_writes; + unsigned long max_writes_hit; + unsigned long t_write_yields; + + unsigned long nop_deletes_skipped; + unsigned long route_adds; + unsigned long route_dels; + + unsigned long updates_triggered; + unsigned long redundant_triggers; + unsigned long non_fpm_table_triggers; + + unsigned long dests_del_after_update; + + unsigned long t_conn_down_starts; + unsigned long t_conn_down_dests_processed; + unsigned long t_conn_down_yields; + unsigned long t_conn_down_finishes; + + unsigned long t_conn_up_starts; + unsigned long t_conn_up_dests_processed; + unsigned long t_conn_up_yields; + unsigned long t_conn_up_aborts; + unsigned long t_conn_up_finishes; + +} zfpm_stats_t; + +/* + * States for the FPM state machine. + */ +typedef enum { + + /* + * In this state we are not yet ready to connect to the FPM. This + * can happen when this module is disabled, or if we're cleaning up + * after a connection has gone down. + */ + ZFPM_STATE_IDLE, + + /* + * Ready to talk to the FPM and periodically trying to connect to + * it. + */ + ZFPM_STATE_ACTIVE, + + /* + * In the middle of bringing up a TCP connection. Specifically, + * waiting for a connect() call to complete asynchronously. + */ + ZFPM_STATE_CONNECTING, + + /* + * TCP connection to the FPM is up. + */ + ZFPM_STATE_ESTABLISHED + +} zfpm_state_t; + +/* + * Globals. + */ +typedef struct zfpm_glob_t_ +{ + + /* + * True if the FPM module has been enabled. + */ + int enabled; + + struct thread_master *master; + + zfpm_state_t state; + + /* + * Port on which the FPM is running. + */ + int fpm_port; + + /* + * List of rib_dest_t structures to be processed + */ + TAILQ_HEAD (zfpm_dest_q, rib_dest_t_) dest_q; + + /* + * Stream socket to the FPM. + */ + int sock; + + /* + * Buffers for messages to/from the FPM. + */ + struct stream *obuf; + struct stream *ibuf; + + /* + * Threads for I/O. + */ + struct thread *t_connect; + struct thread *t_write; + struct thread *t_read; + + /* + * Thread to clean up after the TCP connection to the FPM goes down + * and the state that belongs to it. + */ + struct thread *t_conn_down; + + struct { + zfpm_rnodes_iter_t iter; + } t_conn_down_state; + + /* + * Thread to take actions once the TCP conn to the FPM comes up, and + * the state that belongs to it. + */ + struct thread *t_conn_up; + + struct { + zfpm_rnodes_iter_t iter; + } t_conn_up_state; + + unsigned long connect_calls; + time_t last_connect_call_time; + + /* + * Stats from the start of the current statistics interval up to + * now. These are the counters we typically update in the code. + */ + zfpm_stats_t stats; + + /* + * Statistics that were gathered in the last collection interval. + */ + zfpm_stats_t last_ivl_stats; + + /* + * Cumulative stats from the last clear to the start of the current + * statistics interval. + */ + zfpm_stats_t cumulative_stats; + + /* + * Stats interval timer. + */ + struct thread *t_stats; + + /* + * If non-zero, the last time when statistics were cleared. + */ + time_t last_stats_clear_time; + +} zfpm_glob_t; + +static zfpm_glob_t zfpm_glob_space; +static zfpm_glob_t *zfpm_g = &zfpm_glob_space; + +static int zfpm_read_cb (struct thread *thread); +static int zfpm_write_cb (struct thread *thread); + +static void zfpm_set_state (zfpm_state_t state, const char *reason); +static void zfpm_start_connect_timer (const char *reason); +static void zfpm_start_stats_timer (void); + +/* + * zfpm_thread_should_yield + */ +static inline int +zfpm_thread_should_yield (struct thread *t) +{ + return thread_should_yield (t); +} + +/* + * zfpm_state_to_str + */ +static const char * +zfpm_state_to_str (zfpm_state_t state) +{ + switch (state) + { + + case ZFPM_STATE_IDLE: + return "idle"; + + case ZFPM_STATE_ACTIVE: + return "active"; + + case ZFPM_STATE_CONNECTING: + return "connecting"; + + case ZFPM_STATE_ESTABLISHED: + return "established"; + + default: + return "unknown"; + } +} + +/* + * zfpm_get_time + */ +static time_t +zfpm_get_time (void) +{ + struct timeval tv; + + if (quagga_gettime (QUAGGA_CLK_MONOTONIC, &tv) < 0) + zlog_warn ("FPM: quagga_gettime failed!!"); + + return tv.tv_sec; +} + +/* + * zfpm_get_elapsed_time + * + * Returns the time elapsed (in seconds) since the given time. + */ +static time_t +zfpm_get_elapsed_time (time_t reference) +{ + time_t now; + + now = zfpm_get_time (); + + if (now < reference) + { + assert (0); + return 0; + } + + return now - reference; +} + +/* + * zfpm_is_table_for_fpm + * + * Returns TRUE if the the given table is to be communicated to the + * FPM. + */ +static inline int +zfpm_is_table_for_fpm (struct route_table *table) +{ + rib_table_info_t *info; + + info = rib_table_info (table); + + /* + * We only send the unicast tables in the main instance to the FPM + * at this point. + */ + if (info->vrf->id != 0) + return 0; + + if (info->safi != SAFI_UNICAST) + return 0; + + return 1; +} + +/* + * zfpm_rnodes_iter_init + */ +static inline void +zfpm_rnodes_iter_init (zfpm_rnodes_iter_t *iter) +{ + memset (iter, 0, sizeof (*iter)); + rib_tables_iter_init (&iter->tables_iter); + + /* + * This is a hack, but it makes implementing 'next' easier by + * ensuring that route_table_iter_next() will return NULL the first + * time we call it. + */ + route_table_iter_init (&iter->iter, NULL); + route_table_iter_cleanup (&iter->iter); +} + +/* + * zfpm_rnodes_iter_next + */ +static inline struct route_node * +zfpm_rnodes_iter_next (zfpm_rnodes_iter_t *iter) +{ + struct route_node *rn; + struct route_table *table; + + while (1) + { + rn = route_table_iter_next (&iter->iter); + if (rn) + return rn; + + /* + * We've made our way through this table, go to the next one. + */ + route_table_iter_cleanup (&iter->iter); + + while ((table = rib_tables_iter_next (&iter->tables_iter))) + { + if (zfpm_is_table_for_fpm (table)) + break; + } + + if (!table) + return NULL; + + route_table_iter_init (&iter->iter, table); + } + + return NULL; +} + +/* + * zfpm_rnodes_iter_pause + */ +static inline void +zfpm_rnodes_iter_pause (zfpm_rnodes_iter_t *iter) +{ + route_table_iter_pause (&iter->iter); +} + +/* + * zfpm_rnodes_iter_cleanup + */ +static inline void +zfpm_rnodes_iter_cleanup (zfpm_rnodes_iter_t *iter) +{ + route_table_iter_cleanup (&iter->iter); + rib_tables_iter_cleanup (&iter->tables_iter); +} + +/* + * zfpm_stats_init + * + * Initialize a statistics block. + */ +static inline void +zfpm_stats_init (zfpm_stats_t *stats) +{ + memset (stats, 0, sizeof (*stats)); +} + +/* + * zfpm_stats_reset + */ +static inline void +zfpm_stats_reset (zfpm_stats_t *stats) +{ + zfpm_stats_init (stats); +} + +/* + * zfpm_stats_copy + */ +static inline void +zfpm_stats_copy (const zfpm_stats_t *src, zfpm_stats_t *dest) +{ + memcpy (dest, src, sizeof (*dest)); +} + +/* + * zfpm_stats_compose + * + * Total up the statistics in two stats structures ('s1 and 's2') and + * return the result in the third argument, 'result'. Note that the + * pointer 'result' may be the same as 's1' or 's2'. + * + * For simplicity, the implementation below assumes that the stats + * structure is composed entirely of counters. This can easily be + * changed when necessary. + */ +static void +zfpm_stats_compose (const zfpm_stats_t *s1, const zfpm_stats_t *s2, + zfpm_stats_t *result) +{ + const unsigned long *p1, *p2; + unsigned long *result_p; + int i, num_counters; + + p1 = (const unsigned long *) s1; + p2 = (const unsigned long *) s2; + result_p = (unsigned long *) result; + + num_counters = (sizeof (zfpm_stats_t) / sizeof (unsigned long)); + + for (i = 0; i < num_counters; i++) + { + result_p[i] = p1[i] + p2[i]; + } +} + +/* + * zfpm_read_on + */ +static inline void +zfpm_read_on (void) +{ + assert (!zfpm_g->t_read); + assert (zfpm_g->sock >= 0); + + THREAD_READ_ON (zfpm_g->master, zfpm_g->t_read, zfpm_read_cb, 0, + zfpm_g->sock); +} + +/* + * zfpm_write_on + */ +static inline void +zfpm_write_on (void) +{ + assert (!zfpm_g->t_write); + assert (zfpm_g->sock >= 0); + + THREAD_WRITE_ON (zfpm_g->master, zfpm_g->t_write, zfpm_write_cb, 0, + zfpm_g->sock); +} + +/* + * zfpm_read_off + */ +static inline void +zfpm_read_off (void) +{ + THREAD_READ_OFF (zfpm_g->t_read); +} + +/* + * zfpm_write_off + */ +static inline void +zfpm_write_off (void) +{ + THREAD_WRITE_OFF (zfpm_g->t_write); +} + +/* + * zfpm_conn_up_thread_cb + * + * Callback for actions to be taken when the connection to the FPM + * comes up. + */ +static int +zfpm_conn_up_thread_cb (struct thread *thread) +{ + struct route_node *rnode; + zfpm_rnodes_iter_t *iter; + rib_dest_t *dest; + + assert (zfpm_g->t_conn_up); + zfpm_g->t_conn_up = NULL; + + iter = &zfpm_g->t_conn_up_state.iter; + + if (zfpm_g->state != ZFPM_STATE_ESTABLISHED) + { + zfpm_debug ("Connection not up anymore, conn_up thread aborting"); + zfpm_g->stats.t_conn_up_aborts++; + goto done; + } + + while ((rnode = zfpm_rnodes_iter_next (iter))) + { + dest = rib_dest_from_rnode (rnode); + + if (dest) + { + zfpm_g->stats.t_conn_up_dests_processed++; + zfpm_trigger_update (rnode, NULL); + } + + /* + * Yield if need be. + */ + if (!zfpm_thread_should_yield (thread)) + continue; + + zfpm_g->stats.t_conn_up_yields++; + zfpm_rnodes_iter_pause (iter); + zfpm_g->t_conn_up = thread_add_background (zfpm_g->master, + zfpm_conn_up_thread_cb, + 0, 0); + return 0; + } + + zfpm_g->stats.t_conn_up_finishes++; + + done: + zfpm_rnodes_iter_cleanup (iter); + return 0; +} + +/* + * zfpm_connection_up + * + * Called when the connection to the FPM comes up. + */ +static void +zfpm_connection_up (const char *detail) +{ + assert (zfpm_g->sock >= 0); + zfpm_read_on (); + zfpm_write_on (); + zfpm_set_state (ZFPM_STATE_ESTABLISHED, detail); + + /* + * Start thread to push existing routes to the FPM. + */ + assert (!zfpm_g->t_conn_up); + + zfpm_rnodes_iter_init (&zfpm_g->t_conn_up_state.iter); + + zfpm_debug ("Starting conn_up thread"); + zfpm_g->t_conn_up = thread_add_background (zfpm_g->master, + zfpm_conn_up_thread_cb, 0, 0); + zfpm_g->stats.t_conn_up_starts++; +} + +/* + * zfpm_connect_check + * + * Check if an asynchronous connect() to the FPM is complete. + */ +static void +zfpm_connect_check () +{ + int status; + socklen_t slen; + int ret; + + zfpm_read_off (); + zfpm_write_off (); + + slen = sizeof (status); + ret = getsockopt (zfpm_g->sock, SOL_SOCKET, SO_ERROR, (void *) &status, + &slen); + + if (ret >= 0 && status == 0) + { + zfpm_connection_up ("async connect complete"); + return; + } + + /* + * getsockopt() failed or indicated an error on the socket. + */ + close (zfpm_g->sock); + zfpm_g->sock = -1; + + zfpm_start_connect_timer ("getsockopt() after async connect failed"); + return; +} + +/* + * zfpm_conn_down_thread_cb + * + * Callback that is invoked to clean up state after the TCP connection + * to the FPM goes down. + */ +static int +zfpm_conn_down_thread_cb (struct thread *thread) +{ + struct route_node *rnode; + zfpm_rnodes_iter_t *iter; + rib_dest_t *dest; + + assert (zfpm_g->state == ZFPM_STATE_IDLE); + + assert (zfpm_g->t_conn_down); + zfpm_g->t_conn_down = NULL; + + iter = &zfpm_g->t_conn_down_state.iter; + + while ((rnode = zfpm_rnodes_iter_next (iter))) + { + dest = rib_dest_from_rnode (rnode); + + if (dest) + { + if (CHECK_FLAG (dest->flags, RIB_DEST_UPDATE_FPM)) + { + TAILQ_REMOVE (&zfpm_g->dest_q, dest, fpm_q_entries); + } + + UNSET_FLAG (dest->flags, RIB_DEST_UPDATE_FPM); + UNSET_FLAG (dest->flags, RIB_DEST_SENT_TO_FPM); + + zfpm_g->stats.t_conn_down_dests_processed++; + + /* + * Check if the dest should be deleted. + */ + rib_gc_dest(rnode); + } + + /* + * Yield if need be. + */ + if (!zfpm_thread_should_yield (thread)) + continue; + + zfpm_g->stats.t_conn_down_yields++; + zfpm_rnodes_iter_pause (iter); + zfpm_g->t_conn_down = thread_add_background (zfpm_g->master, + zfpm_conn_down_thread_cb, + 0, 0); + return 0; + } + + zfpm_g->stats.t_conn_down_finishes++; + zfpm_rnodes_iter_cleanup (iter); + + /* + * Start the process of connecting to the FPM again. + */ + zfpm_start_connect_timer ("cleanup complete"); + return 0; +} + +/* + * zfpm_connection_down + * + * Called when the connection to the FPM has gone down. + */ +static void +zfpm_connection_down (const char *detail) +{ + if (!detail) + detail = "unknown"; + + assert (zfpm_g->state == ZFPM_STATE_ESTABLISHED); + + zlog_info ("connection to the FPM has gone down: %s", detail); + + zfpm_read_off (); + zfpm_write_off (); + + stream_reset (zfpm_g->ibuf); + stream_reset (zfpm_g->obuf); + + if (zfpm_g->sock >= 0) { + close (zfpm_g->sock); + zfpm_g->sock = -1; + } + + /* + * Start thread to clean up state after the connection goes down. + */ + assert (!zfpm_g->t_conn_down); + zfpm_debug ("Starting conn_down thread"); + zfpm_rnodes_iter_init (&zfpm_g->t_conn_down_state.iter); + zfpm_g->t_conn_down = thread_add_background (zfpm_g->master, + zfpm_conn_down_thread_cb, 0, 0); + zfpm_g->stats.t_conn_down_starts++; + + zfpm_set_state (ZFPM_STATE_IDLE, detail); +} + +/* + * zfpm_read_cb + */ +static int +zfpm_read_cb (struct thread *thread) +{ + size_t already; + struct stream *ibuf; + uint16_t msg_len; + fpm_msg_hdr_t *hdr; + + zfpm_g->stats.read_cb_calls++; + assert (zfpm_g->t_read); + zfpm_g->t_read = NULL; + + /* + * Check if async connect is now done. + */ + if (zfpm_g->state == ZFPM_STATE_CONNECTING) + { + zfpm_connect_check(); + return 0; + } + + assert (zfpm_g->state == ZFPM_STATE_ESTABLISHED); + assert (zfpm_g->sock >= 0); + + ibuf = zfpm_g->ibuf; + + already = stream_get_endp (ibuf); + if (already < FPM_MSG_HDR_LEN) + { + ssize_t nbyte; + + nbyte = stream_read_try (ibuf, zfpm_g->sock, FPM_MSG_HDR_LEN - already); + if (nbyte == 0 || nbyte == -1) + { + zfpm_connection_down ("closed socket in read"); + return 0; + } + + if (nbyte != (ssize_t) (FPM_MSG_HDR_LEN - already)) + goto done; + + already = FPM_MSG_HDR_LEN; + } + + stream_set_getp (ibuf, 0); + + hdr = (fpm_msg_hdr_t *) stream_pnt (ibuf); + + if (!fpm_msg_hdr_ok (hdr)) + { + zfpm_connection_down ("invalid message header"); + return 0; + } + + msg_len = fpm_msg_len (hdr); + + /* + * Read out the rest of the packet. + */ + if (already < msg_len) + { + ssize_t nbyte; + + nbyte = stream_read_try (ibuf, zfpm_g->sock, msg_len - already); + + if (nbyte == 0 || nbyte == -1) + { + zfpm_connection_down ("failed to read message"); + return 0; + } + + if (nbyte != (ssize_t) (msg_len - already)) + goto done; + } + + zfpm_debug ("Read out a full fpm message"); + + /* + * Just throw it away for now. + */ + stream_reset (ibuf); + + done: + zfpm_read_on (); + return 0; +} + +/* + * zfpm_writes_pending + * + * Returns TRUE if we may have something to write to the FPM. + */ +static int +zfpm_writes_pending (void) +{ + + /* + * Check if there is any data in the outbound buffer that has not + * been written to the socket yet. + */ + if (stream_get_endp (zfpm_g->obuf) - stream_get_getp (zfpm_g->obuf)) + return 1; + + /* + * Check if there are any prefixes on the outbound queue. + */ + if (!TAILQ_EMPTY (&zfpm_g->dest_q)) + return 1; + + return 0; +} + +/* + * zfpm_encode_route + * + * Encode a message to the FPM with information about the given route. + * + * Returns the number of bytes written to the buffer. 0 or a negative + * value indicates an error. + */ +static inline int +zfpm_encode_route (rib_dest_t *dest, struct rib *rib, char *in_buf, + size_t in_buf_len) +{ +#ifndef HAVE_NETLINK + return 0; +#else + + int cmd; + + cmd = rib ? RTM_NEWROUTE : RTM_DELROUTE; + + return zfpm_netlink_encode_route (cmd, dest, rib, in_buf, in_buf_len); + +#endif /* HAVE_NETLINK */ +} + +/* + * zfpm_route_for_update + * + * Returns the rib that is to be sent to the FPM for a given dest. + */ +static struct rib * +zfpm_route_for_update (rib_dest_t *dest) +{ + struct rib *rib; + + RIB_DEST_FOREACH_ROUTE (dest, rib) + { + if (!CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELECTED)) + continue; + + return rib; + } + + /* + * We have no route for this destination. + */ + return NULL; +} + +/* + * zfpm_build_updates + * + * Process the outgoing queue and write messages to the outbound + * buffer. + */ +static void +zfpm_build_updates (void) +{ + struct stream *s; + rib_dest_t *dest; + unsigned char *buf, *data, *buf_end; + size_t msg_len; + size_t data_len; + fpm_msg_hdr_t *hdr; + struct rib *rib; + int is_add, write_msg; + + s = zfpm_g->obuf; + + assert (stream_empty (s)); + + do { + + /* + * Make sure there is enough space to write another message. + */ + if (STREAM_WRITEABLE (s) < FPM_MAX_MSG_LEN) + break; + + buf = STREAM_DATA (s) + stream_get_endp (s); + buf_end = buf + STREAM_WRITEABLE (s); + + dest = TAILQ_FIRST (&zfpm_g->dest_q); + if (!dest) + break; + + assert (CHECK_FLAG (dest->flags, RIB_DEST_UPDATE_FPM)); + + hdr = (fpm_msg_hdr_t *) buf; + hdr->version = FPM_PROTO_VERSION; + hdr->msg_type = FPM_MSG_TYPE_NETLINK; + + data = fpm_msg_data (hdr); + + rib = zfpm_route_for_update (dest); + is_add = rib ? 1 : 0; + + write_msg = 1; + + /* + * If this is a route deletion, and we have not sent the route to + * the FPM previously, skip it. + */ + if (!is_add && !CHECK_FLAG (dest->flags, RIB_DEST_SENT_TO_FPM)) + { + write_msg = 0; + zfpm_g->stats.nop_deletes_skipped++; + } + + if (write_msg) { + data_len = zfpm_encode_route (dest, rib, (char *) data, buf_end - data); + + assert (data_len); + if (data_len) + { + msg_len = fpm_data_len_to_msg_len (data_len); + hdr->msg_len = htons (msg_len); + stream_forward_endp (s, msg_len); + + if (is_add) + zfpm_g->stats.route_adds++; + else + zfpm_g->stats.route_dels++; + } + } + + /* + * Remove the dest from the queue, and reset the flag. + */ + UNSET_FLAG (dest->flags, RIB_DEST_UPDATE_FPM); + TAILQ_REMOVE (&zfpm_g->dest_q, dest, fpm_q_entries); + + if (is_add) + { + SET_FLAG (dest->flags, RIB_DEST_SENT_TO_FPM); + } + else + { + UNSET_FLAG (dest->flags, RIB_DEST_SENT_TO_FPM); + } + + /* + * Delete the destination if necessary. + */ + if (rib_gc_dest (dest->rnode)) + zfpm_g->stats.dests_del_after_update++; + + } while (1); + +} + +/* + * zfpm_write_cb + */ +static int +zfpm_write_cb (struct thread *thread) +{ + struct stream *s; + int num_writes; + + zfpm_g->stats.write_cb_calls++; + assert (zfpm_g->t_write); + zfpm_g->t_write = NULL; + + /* + * Check if async connect is now done. + */ + if (zfpm_g->state == ZFPM_STATE_CONNECTING) + { + zfpm_connect_check (); + return 0; + } + + assert (zfpm_g->state == ZFPM_STATE_ESTABLISHED); + assert (zfpm_g->sock >= 0); + + num_writes = 0; + + do + { + int bytes_to_write, bytes_written; + + s = zfpm_g->obuf; + + /* + * If the stream is empty, try fill it up with data. + */ + if (stream_empty (s)) + { + zfpm_build_updates (); + } + + bytes_to_write = stream_get_endp (s) - stream_get_getp (s); + if (!bytes_to_write) + break; + + bytes_written = write (zfpm_g->sock, STREAM_PNT (s), bytes_to_write); + zfpm_g->stats.write_calls++; + num_writes++; + + if (bytes_written < 0) + { + if (ERRNO_IO_RETRY (errno)) + break; + + zfpm_connection_down ("failed to write to socket"); + return 0; + } + + if (bytes_written != bytes_to_write) + { + + /* + * Partial write. + */ + stream_forward_getp (s, bytes_written); + zfpm_g->stats.partial_writes++; + break; + } + + /* + * We've written out the entire contents of the stream. + */ + stream_reset (s); + + if (num_writes >= ZFPM_MAX_WRITES_PER_RUN) + { + zfpm_g->stats.max_writes_hit++; + break; + } + + if (zfpm_thread_should_yield (thread)) + { + zfpm_g->stats.t_write_yields++; + break; + } + } while (1); + + if (zfpm_writes_pending ()) + zfpm_write_on (); + + return 0; +} + +/* + * zfpm_connect_cb + */ +static int +zfpm_connect_cb (struct thread *t) +{ + int sock, ret; + struct sockaddr_in serv; + + assert (zfpm_g->t_connect); + zfpm_g->t_connect = NULL; + assert (zfpm_g->state == ZFPM_STATE_ACTIVE); + + sock = socket (AF_INET, SOCK_STREAM, 0); + if (sock < 0) + { + zfpm_debug ("Failed to create socket for connect(): %s", strerror(errno)); + zfpm_g->stats.connect_no_sock++; + return 0; + } + + set_nonblocking(sock); + + /* Make server socket. */ + memset (&serv, 0, sizeof (serv)); + serv.sin_family = AF_INET; + serv.sin_port = htons (zfpm_g->fpm_port); +#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN + serv.sin_len = sizeof (struct sockaddr_in); +#endif /* HAVE_STRUCT_SOCKADDR_IN_SIN_LEN */ + serv.sin_addr.s_addr = htonl (INADDR_LOOPBACK); + + /* + * Connect to the FPM. + */ + zfpm_g->connect_calls++; + zfpm_g->stats.connect_calls++; + zfpm_g->last_connect_call_time = zfpm_get_time (); + + ret = connect (sock, (struct sockaddr *) &serv, sizeof (serv)); + if (ret >= 0) + { + zfpm_g->sock = sock; + zfpm_connection_up ("connect succeeded"); + return 1; + } + + if (errno == EINPROGRESS) + { + zfpm_g->sock = sock; + zfpm_read_on (); + zfpm_write_on (); + zfpm_set_state (ZFPM_STATE_CONNECTING, "async connect in progress"); + return 0; + } + + zlog_info ("can't connect to FPM %d: %s", sock, safe_strerror (errno)); + close (sock); + + /* + * Restart timer for retrying connection. + */ + zfpm_start_connect_timer ("connect() failed"); + return 0; +} + +/* + * zfpm_set_state + * + * Move state machine into the given state. + */ +static void +zfpm_set_state (zfpm_state_t state, const char *reason) +{ + zfpm_state_t cur_state = zfpm_g->state; + + if (!reason) + reason = "Unknown"; + + if (state == cur_state) + return; + + zfpm_debug("beginning state transition %s -> %s. Reason: %s", + zfpm_state_to_str (cur_state), zfpm_state_to_str (state), + reason); + + switch (state) { + + case ZFPM_STATE_IDLE: + assert (cur_state == ZFPM_STATE_ESTABLISHED); + break; + + case ZFPM_STATE_ACTIVE: + assert (cur_state == ZFPM_STATE_IDLE || + cur_state == ZFPM_STATE_CONNECTING); + assert (zfpm_g->t_connect); + break; + + case ZFPM_STATE_CONNECTING: + assert (zfpm_g->sock); + assert (cur_state == ZFPM_STATE_ACTIVE); + assert (zfpm_g->t_read); + assert (zfpm_g->t_write); + break; + + case ZFPM_STATE_ESTABLISHED: + assert (cur_state == ZFPM_STATE_ACTIVE || + cur_state == ZFPM_STATE_CONNECTING); + assert (zfpm_g->sock); + assert (zfpm_g->t_read); + assert (zfpm_g->t_write); + break; + } + + zfpm_g->state = state; +} + +/* + * zfpm_calc_connect_delay + * + * Returns the number of seconds after which we should attempt to + * reconnect to the FPM. + */ +static long +zfpm_calc_connect_delay (void) +{ + time_t elapsed; + + /* + * Return 0 if this is our first attempt to connect. + */ + if (zfpm_g->connect_calls == 0) + { + return 0; + } + + elapsed = zfpm_get_elapsed_time (zfpm_g->last_connect_call_time); + + if (elapsed > ZFPM_CONNECT_RETRY_IVL) { + return 0; + } + + return ZFPM_CONNECT_RETRY_IVL - elapsed; +} + +/* + * zfpm_start_connect_timer + */ +static void +zfpm_start_connect_timer (const char *reason) +{ + long delay_secs; + + assert (!zfpm_g->t_connect); + assert (zfpm_g->sock < 0); + + assert(zfpm_g->state == ZFPM_STATE_IDLE || + zfpm_g->state == ZFPM_STATE_ACTIVE || + zfpm_g->state == ZFPM_STATE_CONNECTING); + + delay_secs = zfpm_calc_connect_delay(); + zfpm_debug ("scheduling connect in %ld seconds", delay_secs); + + THREAD_TIMER_ON (zfpm_g->master, zfpm_g->t_connect, zfpm_connect_cb, 0, + delay_secs); + zfpm_set_state (ZFPM_STATE_ACTIVE, reason); +} + +/* + * zfpm_is_enabled + * + * Returns TRUE if the zebra FPM module has been enabled. + */ +static inline int +zfpm_is_enabled (void) +{ + return zfpm_g->enabled; +} + +/* + * zfpm_conn_is_up + * + * Returns TRUE if the connection to the FPM is up. + */ +static inline int +zfpm_conn_is_up (void) +{ + if (zfpm_g->state != ZFPM_STATE_ESTABLISHED) + return 0; + + assert (zfpm_g->sock >= 0); + + return 1; +} + +/* + * zfpm_trigger_update + * + * The zebra code invokes this function to indicate that we should + * send an update to the FPM about the given route_node. + */ +void +zfpm_trigger_update (struct route_node *rn, const char *reason) +{ + rib_dest_t *dest; + char buf[INET6_ADDRSTRLEN]; + + /* + * Ignore if the connection is down. We will update the FPM about + * all destinations once the connection comes up. + */ + if (!zfpm_conn_is_up ()) + return; + + dest = rib_dest_from_rnode (rn); + + /* + * Ignore the trigger if the dest is not in a table that we would + * send to the FPM. + */ + if (!zfpm_is_table_for_fpm (rib_dest_table (dest))) + { + zfpm_g->stats.non_fpm_table_triggers++; + return; + } + + if (CHECK_FLAG (dest->flags, RIB_DEST_UPDATE_FPM)) { + zfpm_g->stats.redundant_triggers++; + return; + } + + if (reason) + { + zfpm_debug ("%s/%d triggering update to FPM - Reason: %s", + inet_ntop (rn->p.family, &rn->p.u.prefix, buf, sizeof (buf)), + rn->p.prefixlen, reason); + } + + SET_FLAG (dest->flags, RIB_DEST_UPDATE_FPM); + TAILQ_INSERT_TAIL (&zfpm_g->dest_q, dest, fpm_q_entries); + zfpm_g->stats.updates_triggered++; + + /* + * Make sure that writes are enabled. + */ + if (zfpm_g->t_write) + return; + + zfpm_write_on (); +} + +/* + * zfpm_stats_timer_cb + */ +static int +zfpm_stats_timer_cb (struct thread *t) +{ + assert (zfpm_g->t_stats); + zfpm_g->t_stats = NULL; + + /* + * Remember the stats collected in the last interval for display + * purposes. + */ + zfpm_stats_copy (&zfpm_g->stats, &zfpm_g->last_ivl_stats); + + /* + * Add the current set of stats into the cumulative statistics. + */ + zfpm_stats_compose (&zfpm_g->cumulative_stats, &zfpm_g->stats, + &zfpm_g->cumulative_stats); + + /* + * Start collecting stats afresh over the next interval. + */ + zfpm_stats_reset (&zfpm_g->stats); + + zfpm_start_stats_timer (); + + return 0; +} + +/* + * zfpm_stop_stats_timer + */ +static void +zfpm_stop_stats_timer (void) +{ + if (!zfpm_g->t_stats) + return; + + zfpm_debug ("Stopping existing stats timer"); + THREAD_TIMER_OFF (zfpm_g->t_stats); +} + +/* + * zfpm_start_stats_timer + */ +void +zfpm_start_stats_timer (void) +{ + assert (!zfpm_g->t_stats); + + THREAD_TIMER_ON (zfpm_g->master, zfpm_g->t_stats, zfpm_stats_timer_cb, 0, + ZFPM_STATS_IVL_SECS); +} + +/* + * Helper macro for zfpm_show_stats() below. + */ +#define ZFPM_SHOW_STAT(counter) \ + do { \ + vty_out (vty, "%-40s %10lu %16lu%s", #counter, total_stats.counter, \ + zfpm_g->last_ivl_stats.counter, VTY_NEWLINE); \ + } while (0) + +/* + * zfpm_show_stats + */ +static void +zfpm_show_stats (struct vty *vty) +{ + zfpm_stats_t total_stats; + time_t elapsed; + + vty_out (vty, "%s%-40s %10s Last %2d secs%s%s", VTY_NEWLINE, "Counter", + "Total", ZFPM_STATS_IVL_SECS, VTY_NEWLINE, VTY_NEWLINE); + + /* + * Compute the total stats up to this instant. + */ + zfpm_stats_compose (&zfpm_g->cumulative_stats, &zfpm_g->stats, + &total_stats); + + ZFPM_SHOW_STAT (connect_calls); + ZFPM_SHOW_STAT (connect_no_sock); + ZFPM_SHOW_STAT (read_cb_calls); + ZFPM_SHOW_STAT (write_cb_calls); + ZFPM_SHOW_STAT (write_calls); + ZFPM_SHOW_STAT (partial_writes); + ZFPM_SHOW_STAT (max_writes_hit); + ZFPM_SHOW_STAT (t_write_yields); + ZFPM_SHOW_STAT (nop_deletes_skipped); + ZFPM_SHOW_STAT (route_adds); + ZFPM_SHOW_STAT (route_dels); + ZFPM_SHOW_STAT (updates_triggered); + ZFPM_SHOW_STAT (non_fpm_table_triggers); + ZFPM_SHOW_STAT (redundant_triggers); + ZFPM_SHOW_STAT (dests_del_after_update); + ZFPM_SHOW_STAT (t_conn_down_starts); + ZFPM_SHOW_STAT (t_conn_down_dests_processed); + ZFPM_SHOW_STAT (t_conn_down_yields); + ZFPM_SHOW_STAT (t_conn_down_finishes); + ZFPM_SHOW_STAT (t_conn_up_starts); + ZFPM_SHOW_STAT (t_conn_up_dests_processed); + ZFPM_SHOW_STAT (t_conn_up_yields); + ZFPM_SHOW_STAT (t_conn_up_aborts); + ZFPM_SHOW_STAT (t_conn_up_finishes); + + if (!zfpm_g->last_stats_clear_time) + return; + + elapsed = zfpm_get_elapsed_time (zfpm_g->last_stats_clear_time); + + vty_out (vty, "%sStats were cleared %lu seconds ago%s", VTY_NEWLINE, + (unsigned long) elapsed, VTY_NEWLINE); +} + +/* + * zfpm_clear_stats + */ +static void +zfpm_clear_stats (struct vty *vty) +{ + if (!zfpm_is_enabled ()) + { + vty_out (vty, "The FPM module is not enabled...%s", VTY_NEWLINE); + return; + } + + zfpm_stats_reset (&zfpm_g->stats); + zfpm_stats_reset (&zfpm_g->last_ivl_stats); + zfpm_stats_reset (&zfpm_g->cumulative_stats); + + zfpm_stop_stats_timer (); + zfpm_start_stats_timer (); + + zfpm_g->last_stats_clear_time = zfpm_get_time(); + + vty_out (vty, "Cleared FPM stats%s", VTY_NEWLINE); +} + +/* + * show_zebra_fpm_stats + */ +DEFUN (show_zebra_fpm_stats, + show_zebra_fpm_stats_cmd, + "show zebra fpm stats", + SHOW_STR + "Zebra information\n" + "Forwarding Path Manager information\n" + "Statistics\n") +{ + zfpm_show_stats (vty); + return CMD_SUCCESS; +} + +/* + * clear_zebra_fpm_stats + */ +DEFUN (clear_zebra_fpm_stats, + clear_zebra_fpm_stats_cmd, + "clear zebra fpm stats", + CLEAR_STR + "Zebra information\n" + "Clear Forwarding Path Manager information\n" + "Statistics\n") +{ + zfpm_clear_stats (vty); + return CMD_SUCCESS; +} + +/** + * zfpm_init + * + * One-time initialization of the Zebra FPM module. + * + * @param[in] port port at which FPM is running. + * @param[in] enable TRUE if the zebra FPM module should be enabled + * + * Returns TRUE on success. + */ +int +zfpm_init (struct thread_master *master, int enable, uint16_t port) +{ + static int initialized = 0; + + if (initialized) { + return 1; + } + + initialized = 1; + + memset (zfpm_g, 0, sizeof (*zfpm_g)); + zfpm_g->master = master; + TAILQ_INIT(&zfpm_g->dest_q); + zfpm_g->sock = -1; + zfpm_g->state = ZFPM_STATE_IDLE; + + /* + * Netlink must currently be available for the Zebra-FPM interface + * to be enabled. + */ +#ifndef HAVE_NETLINK + enable = 0; +#endif + + zfpm_g->enabled = enable; + + zfpm_stats_init (&zfpm_g->stats); + zfpm_stats_init (&zfpm_g->last_ivl_stats); + zfpm_stats_init (&zfpm_g->cumulative_stats); + + install_element (ENABLE_NODE, &show_zebra_fpm_stats_cmd); + install_element (ENABLE_NODE, &clear_zebra_fpm_stats_cmd); + + if (!enable) { + return 1; + } + + if (!port) + port = FPM_DEFAULT_PORT; + + zfpm_g->fpm_port = port; + + zfpm_g->obuf = stream_new (ZFPM_OBUF_SIZE); + zfpm_g->ibuf = stream_new (ZFPM_IBUF_SIZE); + + zfpm_start_stats_timer (); + zfpm_start_connect_timer ("initialized"); + + return 1; +} diff --git a/zebra/zebra_fpm.h b/zebra/zebra_fpm.h new file mode 100644 index 00000000..44dec028 --- /dev/null +++ b/zebra/zebra_fpm.h @@ -0,0 +1,34 @@ +/* + * Header file exported by the zebra FPM module to zebra. + * + * Copyright (C) 2012 by Open Source Routing. + * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC") + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Zebra; see the file COPYING. If not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#ifndef _ZEBRA_FPM_H +#define _ZEBRA_FPM_H + +/* + * Externs. + */ +extern int zfpm_init (struct thread_master *master, int enable, uint16_t port); +extern void zfpm_trigger_update (struct route_node *rn, const char *reason); + +#endif /* _ZEBRA_FPM_H */ diff --git a/zebra/zebra_fpm_netlink.c b/zebra/zebra_fpm_netlink.c new file mode 100644 index 00000000..90d3afb2 --- /dev/null +++ b/zebra/zebra_fpm_netlink.c @@ -0,0 +1,552 @@ +/* + * Code for encoding/decoding FPM messages that are in netlink format. + * + * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro + * Copyright (C) 2012 by Open Source Routing. + * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC") + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Zebra; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <zebra.h> + +#include "log.h" +#include "rib.h" + +#include "rt_netlink.h" + +#include "zebra_fpm_private.h" + +/* + * addr_to_a + * + * Returns string representation of an address of the given AF. + */ +static inline const char * +addr_to_a (u_char af, void *addr) +{ + if (!addr) + return "<No address>"; + + switch (af) + { + + case AF_INET: + return inet_ntoa (*((struct in_addr *) addr)); + +#ifdef HAVE_IPV6 + case AF_INET6: + return inet6_ntoa (*((struct in6_addr *) addr)); +#endif + + default: + return "<Addr in unknown AF>"; + } +} + +/* + * prefix_addr_to_a + * + * Convience wrapper that returns a human-readable string for the + * address in a prefix. + */ +static const char * +prefix_addr_to_a (struct prefix *prefix) +{ + if (!prefix) + return "<No address>"; + + return addr_to_a (prefix->family, &prefix->u.prefix); +} + +/* + * af_addr_size + * + * The size of an address in a given address family. + */ +static size_t +af_addr_size (u_char af) +{ + switch (af) + { + + case AF_INET: + return 4; + +#ifdef HAVE_IPV6 + case AF_INET6: + return 16; +#endif + + default: + assert(0); + return 16; + } +} + +/* + * netlink_nh_info_t + * + * Holds information about a single nexthop for netlink. These info + * structures are transient and may contain pointers into rib + * data structures for convenience. + */ +typedef struct netlink_nh_info_t_ +{ + uint32_t if_index; + union g_addr *gateway; + + /* + * Information from the struct nexthop from which this nh was + * derived. For debug purposes only. + */ + int recursive; + enum nexthop_types_t type; +} netlink_nh_info_t; + +/* + * netlink_route_info_t + * + * A structure for holding information for a netlink route message. + */ +typedef struct netlink_route_info_t_ +{ + uint16_t nlmsg_type; + u_char rtm_type; + uint32_t rtm_table; + u_char rtm_protocol; + u_char af; + struct prefix *prefix; + uint32_t *metric; + int num_nhs; + + /* + * Nexthop structures. We keep things simple for now by enforcing a + * maximum of 64 in case MULTIPATH_NUM is 0; + */ + netlink_nh_info_t nhs[MAX (MULTIPATH_NUM, 64)]; + union g_addr *pref_src; +} netlink_route_info_t; + +/* + * netlink_route_info_add_nh + * + * Add information about the given nexthop to the given route info + * structure. + * + * Returns TRUE if a nexthop was added, FALSE otherwise. + */ +static int +netlink_route_info_add_nh (netlink_route_info_t *ri, struct nexthop *nexthop) +{ + netlink_nh_info_t nhi; + union g_addr *src; + + memset (&nhi, 0, sizeof (nhi)); + src = NULL; + + if (ri->num_nhs >= (int) ZEBRA_NUM_OF (ri->nhs)) + return 0; + + if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_RECURSIVE)) + { + nhi.recursive = 1; + nhi.type = nexthop->rtype; + + if (nexthop->rtype == NEXTHOP_TYPE_IPV4 + || nexthop->rtype == NEXTHOP_TYPE_IPV4_IFINDEX) + { + nhi.gateway = &nexthop->rgate; + if (nexthop->src.ipv4.s_addr) + src = &nexthop->src; + } + +#ifdef HAVE_IPV6 + if (nexthop->rtype == NEXTHOP_TYPE_IPV6 + || nexthop->rtype == NEXTHOP_TYPE_IPV6_IFINDEX + || nexthop->rtype == NEXTHOP_TYPE_IPV6_IFNAME) + { + nhi.gateway = &nexthop->rgate; + } +#endif /* HAVE_IPV6 */ + + if (nexthop->rtype == NEXTHOP_TYPE_IFINDEX + || nexthop->rtype == NEXTHOP_TYPE_IFNAME + || nexthop->rtype == NEXTHOP_TYPE_IPV4_IFINDEX + || nexthop->rtype == NEXTHOP_TYPE_IPV6_IFINDEX + || nexthop->rtype == NEXTHOP_TYPE_IPV6_IFNAME) + { + nhi.if_index = nexthop->rifindex; + if ((nexthop->rtype == NEXTHOP_TYPE_IPV4_IFINDEX + || nexthop->rtype == NEXTHOP_TYPE_IFINDEX) + && nexthop->src.ipv4.s_addr) + src = &nexthop->src; + } + + goto done; + } + + nhi.recursive = 0; + nhi.type = nexthop->type; + + if (nexthop->type == NEXTHOP_TYPE_IPV4 + || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) + { + nhi.gateway = &nexthop->gate; + if (nexthop->src.ipv4.s_addr) + src = &nexthop->src; + } + +#ifdef HAVE_IPV6 + if (nexthop->type == NEXTHOP_TYPE_IPV6 + || nexthop->type == NEXTHOP_TYPE_IPV6_IFNAME + || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) + { + nhi.gateway = &nexthop->gate; + } +#endif /* HAVE_IPV6 */ + if (nexthop->type == NEXTHOP_TYPE_IFINDEX + || nexthop->type == NEXTHOP_TYPE_IFNAME + || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) + { + nhi.if_index = nexthop->ifindex; + + if (nexthop->src.ipv4.s_addr) + src = &nexthop->src; + } + else if (nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX + || nexthop->type == NEXTHOP_TYPE_IPV6_IFNAME) + { + nhi.if_index = nexthop->ifindex; + } + + /* + * Fall through... + */ + + done: + if (!nhi.gateway && nhi.if_index == 0) + return 0; + + /* + * We have a valid nhi. Copy the structure over to the route_info. + */ + ri->nhs[ri->num_nhs] = nhi; + ri->num_nhs++; + + if (src && !ri->pref_src) + ri->pref_src = src; + + return 1; +} + +/* + * netlink_proto_from_route_type + */ +static u_char +netlink_proto_from_route_type (int type) +{ + switch (type) + { + case ZEBRA_ROUTE_KERNEL: + case ZEBRA_ROUTE_CONNECT: + return RTPROT_KERNEL; + + default: + return RTPROT_ZEBRA; + } +} + +/* + * netlink_route_info_fill + * + * Fill out the route information object from the given route. + * + * Returns TRUE on success and FALSE on failure. + */ +static int +netlink_route_info_fill (netlink_route_info_t *ri, int cmd, + rib_dest_t *dest, struct rib *rib) +{ + struct nexthop *nexthop = NULL; + int discard; + + memset (ri, 0, sizeof (*ri)); + + ri->prefix = rib_dest_prefix (dest); + ri->af = rib_dest_af (dest); + + ri->nlmsg_type = cmd; + ri->rtm_table = rib_dest_vrf (dest)->id; + ri->rtm_protocol = RTPROT_UNSPEC; + + /* + * An RTM_DELROUTE need not be accompanied by any nexthops, + * particularly in our communication with the FPM. + */ + if (cmd == RTM_DELROUTE && !rib) + goto skip; + + if (rib) + ri->rtm_protocol = netlink_proto_from_route_type (rib->type); + + if ((rib->flags & ZEBRA_FLAG_BLACKHOLE) || (rib->flags & ZEBRA_FLAG_REJECT)) + discard = 1; + else + discard = 0; + + if (cmd == RTM_NEWROUTE) + { + if (discard) + { + if (rib->flags & ZEBRA_FLAG_BLACKHOLE) + ri->rtm_type = RTN_BLACKHOLE; + else if (rib->flags & ZEBRA_FLAG_REJECT) + ri->rtm_type = RTN_UNREACHABLE; + else + assert (0); + } + else + ri->rtm_type = RTN_UNICAST; + } + + ri->metric = &rib->metric; + + if (discard) + { + goto skip; + } + + /* Multipath case. */ + if (rib->nexthop_active_num == 1 || MULTIPATH_NUM == 1) + { + for (nexthop = rib->nexthop; nexthop; nexthop = nexthop->next) + { + + if ((cmd == RTM_NEWROUTE + && CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_ACTIVE)) + || (cmd == RTM_DELROUTE + && CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB))) + { + netlink_route_info_add_nh (ri, nexthop); + break; + } + } + } + else + { + for (nexthop = rib->nexthop; + nexthop && (MULTIPATH_NUM == 0 || ri->num_nhs < MULTIPATH_NUM); + nexthop = nexthop->next) + { + if ((cmd == RTM_NEWROUTE + && CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_ACTIVE)) + || (cmd == RTM_DELROUTE + && CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB))) + { + netlink_route_info_add_nh (ri, nexthop); + } + } + } + + /* If there is no useful nexthop then return. */ + if (ri->num_nhs == 0) + { + zfpm_debug ("netlink_encode_route(): No useful nexthop."); + return 0; + } + + skip: + return 1; +} + +/* + * netlink_route_info_encode + * + * Returns the number of bytes written to the buffer. 0 or a negative + * value indicates an error. + */ +static int +netlink_route_info_encode (netlink_route_info_t *ri, char *in_buf, + size_t in_buf_len) +{ + int bytelen; + int nexthop_num = 0; + size_t buf_offset; + netlink_nh_info_t *nhi; + + struct + { + struct nlmsghdr n; + struct rtmsg r; + char buf[1]; + } *req; + + req = (void *) in_buf; + + buf_offset = ((char *) req->buf) - ((char *) req); + + if (in_buf_len < buf_offset) { + assert(0); + return 0; + } + + memset (req, 0, buf_offset); + + bytelen = af_addr_size (ri->af); + + req->n.nlmsg_len = NLMSG_LENGTH (sizeof (struct rtmsg)); + req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + req->n.nlmsg_type = ri->nlmsg_type; + req->r.rtm_family = ri->af; + req->r.rtm_table = ri->rtm_table; + req->r.rtm_dst_len = ri->prefix->prefixlen; + req->r.rtm_protocol = ri->rtm_protocol; + req->r.rtm_scope = RT_SCOPE_UNIVERSE; + + addattr_l (&req->n, in_buf_len, RTA_DST, &ri->prefix->u.prefix, bytelen); + + req->r.rtm_type = ri->rtm_type; + + /* Metric. */ + if (ri->metric) + addattr32 (&req->n, in_buf_len, RTA_PRIORITY, *ri->metric); + + if (ri->num_nhs == 0) + goto done; + + if (ri->num_nhs == 1) + { + nhi = &ri->nhs[0]; + + if (nhi->gateway) + { + addattr_l (&req->n, in_buf_len, RTA_GATEWAY, nhi->gateway, + bytelen); + } + + if (nhi->if_index) + { + addattr32 (&req->n, in_buf_len, RTA_OIF, nhi->if_index); + } + + goto done; + + } + + /* + * Multipath case. + */ + char buf[NL_PKT_BUF_SIZE]; + struct rtattr *rta = (void *) buf; + struct rtnexthop *rtnh; + + rta->rta_type = RTA_MULTIPATH; + rta->rta_len = RTA_LENGTH (0); + rtnh = RTA_DATA (rta); + + for (nexthop_num = 0; nexthop_num < ri->num_nhs; nexthop_num++) + { + nhi = &ri->nhs[nexthop_num]; + + rtnh->rtnh_len = sizeof (*rtnh); + rtnh->rtnh_flags = 0; + rtnh->rtnh_hops = 0; + rtnh->rtnh_ifindex = 0; + rta->rta_len += rtnh->rtnh_len; + + if (nhi->gateway) + { + rta_addattr_l (rta, sizeof (buf), RTA_GATEWAY, nhi->gateway, bytelen); + rtnh->rtnh_len += sizeof (struct rtattr) + bytelen; + } + + if (nhi->if_index) + { + rtnh->rtnh_ifindex = nhi->if_index; + } + + rtnh = RTNH_NEXT (rtnh); + } + + assert (rta->rta_len > RTA_LENGTH (0)); + addattr_l (&req->n, in_buf_len, RTA_MULTIPATH, RTA_DATA (rta), + RTA_PAYLOAD (rta)); + +done: + + if (ri->pref_src) + { + addattr_l (&req->n, in_buf_len, RTA_PREFSRC, &ri->pref_src, bytelen); + } + + assert (req->n.nlmsg_len < in_buf_len); + return req->n.nlmsg_len; +} + +/* + * zfpm_log_route_info + * + * Helper function to log the information in a route_info structure. + */ +static void +zfpm_log_route_info (netlink_route_info_t *ri, const char *label) +{ + netlink_nh_info_t *nhi; + int i; + + zfpm_debug ("%s : %s %s/%d, Proto: %s, Metric: %u", label, + nl_msg_type_to_str (ri->nlmsg_type), + prefix_addr_to_a (ri->prefix), ri->prefix->prefixlen, + nl_rtproto_to_str (ri->rtm_protocol), + ri->metric ? *ri->metric : 0); + + for (i = 0; i < ri->num_nhs; i++) + { + nhi = &ri->nhs[i]; + zfpm_debug(" Intf: %u, Gateway: %s, Recursive: %s, Type: %s", + nhi->if_index, addr_to_a (ri->af, nhi->gateway), + nhi->recursive ? "yes" : "no", + nexthop_type_to_str (nhi->type)); + } +} + +/* + * zfpm_netlink_encode_route + * + * Create a netlink message corresponding to the given route in the + * given buffer space. + * + * Returns the number of bytes written to the buffer. 0 or a negative + * value indicates an error. + */ +int +zfpm_netlink_encode_route (int cmd, rib_dest_t *dest, struct rib *rib, + char *in_buf, size_t in_buf_len) +{ + netlink_route_info_t ri_space, *ri; + + ri = &ri_space; + + if (!netlink_route_info_fill (ri, cmd, dest, rib)) + return 0; + + zfpm_log_route_info (ri, __FUNCTION__); + + return netlink_route_info_encode (ri, in_buf, in_buf_len); +} diff --git a/zebra/zebra_fpm_private.h b/zebra/zebra_fpm_private.h new file mode 100644 index 00000000..809a70a4 --- /dev/null +++ b/zebra/zebra_fpm_private.h @@ -0,0 +1,56 @@ +/* + * Private header file for the zebra FPM module. + * + * Copyright (C) 2012 by Open Source Routing. + * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC") + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Zebra; see the file COPYING. If not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#ifndef _ZEBRA_FPM_PRIVATE_H +#define _ZEBRA_FPM_PRIVATE_H + +#include "zebra/debug.h" + +#if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L + +#define zfpm_debug(...) \ + do { \ + if (IS_ZEBRA_DEBUG_FPM) zlog_debug("FPM: " __VA_ARGS__); \ + } while(0) + +#elif defined __GNUC__ + +#define zfpm_debug(_args...) \ + do { \ + if (IS_ZEBRA_DEBUG_FPM) zlog_debug("FPM: " _args); \ + } while(0) + +#else +static inline void zfpm_debug(const char *format, ...) { return; } +#endif + + +/* + * Externs + */ +extern int +zfpm_netlink_encode_route (int cmd, rib_dest_t *dest, struct rib *rib, + char *in_buf, size_t in_buf_len); + +#endif /* _ZEBRA_FPM_PRIVATE_H */ diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index 29977123..a75d7215 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -40,6 +40,7 @@ #include "zebra/zserv.h" #include "zebra/redistribute.h" #include "zebra/debug.h" +#include "zebra/zebra_fpm.h" /* Default rtm_table for all clients */ extern struct zebra_t zebrad; @@ -961,6 +962,11 @@ rib_install_kernel (struct route_node *rn, struct rib *rib) int ret = 0; struct nexthop *nexthop; + /* + * Make sure we update the FPM any time we send new information to + * the kernel. + */ + zfpm_trigger_update (rn, "installing in kernel"); switch (PREFIX_FAMILY (&rn->p)) { case AF_INET: @@ -988,6 +994,12 @@ rib_uninstall_kernel (struct route_node *rn, struct rib *rib) int ret = 0; struct nexthop *nexthop; + /* + * Make sure we update the FPM any time we send new information to + * the kernel. + */ + zfpm_trigger_update (rn, "uninstalling from kernel"); + switch (PREFIX_FAMILY (&rn->p)) { case AF_INET: @@ -1012,6 +1024,8 @@ rib_uninstall (struct route_node *rn, struct rib *rib) { if (CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELECTED)) { + zfpm_trigger_update (rn, "rib_uninstall"); + redistribute_delete (&rn->p, rib); if (! RIB_SYSTEM_ROUTE (rib)) rib_uninstall_kernel (rn, rib); @@ -1034,6 +1048,14 @@ rib_can_delete_dest (rib_dest_t *dest) return 0; } + /* + * Don't delete the dest if we have to update the FPM about this + * prefix. + */ + if (CHECK_FLAG (dest->flags, RIB_DEST_UPDATE_FPM) || + CHECK_FLAG (dest->flags, RIB_DEST_SENT_TO_FPM)) + return 0; + return 1; } @@ -1187,6 +1209,8 @@ rib_process (struct route_node *rn) __func__, buf, rn->p.prefixlen, select, fib); if (CHECK_FLAG (select->flags, ZEBRA_FLAG_CHANGED)) { + zfpm_trigger_update (rn, "updating existing route"); + redistribute_delete (&rn->p, select); if (! RIB_SYSTEM_ROUTE (select)) rib_uninstall_kernel (rn, select); @@ -1228,6 +1252,9 @@ rib_process (struct route_node *rn) if (IS_ZEBRA_DEBUG_RIB) zlog_debug ("%s: %s/%d: Removing existing route, fib %p", __func__, buf, rn->p.prefixlen, fib); + + zfpm_trigger_update (rn, "removing existing route"); + redistribute_delete (&rn->p, fib); if (! RIB_SYSTEM_ROUTE (fib)) rib_uninstall_kernel (rn, fib); @@ -1246,6 +1273,9 @@ rib_process (struct route_node *rn) if (IS_ZEBRA_DEBUG_RIB) zlog_debug ("%s: %s/%d: Adding route, select %p", __func__, buf, rn->p.prefixlen, select); + + zfpm_trigger_update (rn, "new route selected"); + /* Set real nexthop. */ nexthop_active_update (rn, select, 1); @@ -3081,6 +3111,8 @@ rib_close_table (struct route_table *table) if (!CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELECTED)) continue; + zfpm_trigger_update (rn, NULL); + if (! RIB_SYSTEM_ROUTE (rib)) rib_uninstall_kernel (rn, rib); } |