summaryrefslogtreecommitdiff
path: root/bgpd/bgp_packet.c
diff options
context:
space:
mode:
Diffstat (limited to 'bgpd/bgp_packet.c')
-rw-r--r--bgpd/bgp_packet.c158
1 files changed, 87 insertions, 71 deletions
diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c
index fc653e21..f5a74d1b 100644
--- a/bgpd/bgp_packet.c
+++ b/bgpd/bgp_packet.c
@@ -206,7 +206,7 @@ bgp_update_packet (struct peer *peer, afi_t afi, safi_t safi)
/* Synchnorize attribute. */
if (adj->attr)
- bgp_attr_unintern (adj->attr);
+ bgp_attr_unintern (&adj->attr);
else
peer->scount[afi][safi]++;
@@ -598,7 +598,6 @@ bgp_write (struct thread *thread)
struct stream *s;
int num;
unsigned int count = 0;
- int write_errno;
/* Yes first of all get peer pointer. */
peer = THREAD_ARG (thread);
@@ -611,46 +610,37 @@ bgp_write (struct thread *thread)
return 0;
}
- /* Nonblocking write until TCP output buffer is full. */
- while (1)
+ s = bgp_write_packet (peer);
+ if (!s)
+ return 0; /* nothing to send */
+
+ sockopt_cork (peer->fd, 1);
+
+ /* Nonblocking write until TCP output buffer is full. */
+ do
{
int writenum;
- int val;
-
- s = bgp_write_packet (peer);
- if (! s)
- return 0;
-
- /* XXX: FIXME, the socket should be NONBLOCK from the start
- * status shouldnt need to be toggled on each write
- */
- val = fcntl (peer->fd, F_GETFL, 0);
- fcntl (peer->fd, F_SETFL, val|O_NONBLOCK);
/* Number of bytes to be sent. */
writenum = stream_get_endp (s) - stream_get_getp (s);
/* Call write() system call. */
num = write (peer->fd, STREAM_PNT (s), writenum);
- write_errno = errno;
- fcntl (peer->fd, F_SETFL, val);
- if (num <= 0)
+ if (num < 0)
{
- /* Partial write. */
- if (write_errno == EWOULDBLOCK || write_errno == EAGAIN)
- break;
+ /* write failed either retry needed or error */
+ if (ERRNO_IO_RETRY(errno))
+ break;
- BGP_EVENT_ADD (peer, TCP_fatal_error);
+ BGP_EVENT_ADD (peer, TCP_fatal_error);
return 0;
}
+
if (num != writenum)
{
+ /* Partial write */
stream_forward_getp (s, num);
-
- if (write_errno == EAGAIN)
- break;
-
- continue;
+ break;
}
/* Retrieve BGP packet type. */
@@ -691,13 +681,14 @@ bgp_write (struct thread *thread)
/* OK we send packet so delete it. */
bgp_packet_delete (peer);
-
- if (++count >= BGP_WRITE_PACKET_MAX)
- break;
}
+ while (++count < BGP_WRITE_PACKET_MAX &&
+ (s = bgp_write_packet (peer)) != NULL);
if (bgp_write_proceed (peer))
BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd);
+ else
+ sockopt_cork (peer->fd, 0);
return 0;
}
@@ -706,7 +697,7 @@ bgp_write (struct thread *thread)
static int
bgp_write_notify (struct peer *peer)
{
- int ret;
+ int ret, val;
u_char type;
struct stream *s;
@@ -716,7 +707,10 @@ bgp_write_notify (struct peer *peer)
return 0;
assert (stream_get_endp (s) >= BGP_HEADER_SIZE);
- /* I'm not sure fd is writable. */
+ /* Put socket in blocking mode. */
+ val = fcntl (peer->fd, F_GETFL, 0);
+ fcntl (peer->fd, F_SETFL, val & ~O_NONBLOCK);
+
ret = writen (peer->fd, STREAM_DATA (s), stream_get_endp (s));
if (ret <= 0)
{
@@ -1602,26 +1596,47 @@ bgp_update_receive (struct peer *peer, bgp_size_t size)
BGP_NOTIFY_UPDATE_MAL_ATTR);
return -1;
}
+
+ /* Certain attribute parsing errors should not be considered bad enough
+ * to reset the session for, most particularly any partial/optional
+ * attributes that have 'tunneled' over speakers that don't understand
+ * them. Instead we withdraw only the prefix concerned.
+ *
+ * Complicates the flow a little though..
+ */
+ bgp_attr_parse_ret_t attr_parse_ret = BGP_ATTR_PARSE_PROCEED;
+ /* This define morphs the update case into a withdraw when lower levels
+ * have signalled an error condition where this is best.
+ */
+#define NLRI_ATTR_ARG (attr_parse_ret != BGP_ATTR_PARSE_WITHDRAW ? &attr : NULL)
/* Parse attribute when it exists. */
if (attribute_len)
{
- ret = bgp_attr_parse (peer, &attr, attribute_len,
+ attr_parse_ret = bgp_attr_parse (peer, &attr, attribute_len,
&mp_update, &mp_withdraw);
- if (ret < 0)
+ if (attr_parse_ret == BGP_ATTR_PARSE_ERROR)
return -1;
}
-
+
/* Logging the attribute. */
- if (BGP_DEBUG (update, UPDATE_IN))
+ if (attr_parse_ret == BGP_ATTR_PARSE_WITHDRAW
+ || BGP_DEBUG (update, UPDATE_IN))
{
ret= bgp_dump_attr (peer, &attr, attrstr, BUFSIZ);
+ int lvl = (attr_parse_ret == BGP_ATTR_PARSE_WITHDRAW)
+ ? LOG_ERR : LOG_DEBUG;
+
+ if (attr_parse_ret == BGP_ATTR_PARSE_WITHDRAW)
+ zlog (peer->log, LOG_ERR,
+ "%s rcvd UPDATE with errors in attr(s)!! Withdrawing route.",
+ peer->host);
if (ret)
- zlog (peer->log, LOG_DEBUG, "%s rcvd UPDATE w/ attr: %s",
+ zlog (peer->log, lvl, "%s rcvd UPDATE w/ attr: %s",
peer->host, attrstr);
}
-
+
/* Network Layer Reachability Information. */
update_len = end - stream_pnt (s);
@@ -1630,7 +1645,12 @@ bgp_update_receive (struct peer *peer, bgp_size_t size)
/* Check NLRI packet format and prefix length. */
ret = bgp_nlri_sanity_check (peer, AFI_IP, stream_pnt (s), update_len);
if (ret < 0)
- return -1;
+ {
+ bgp_attr_unintern_sub (&attr);
+ if (attr.extra)
+ bgp_attr_extra_free (&attr);
+ return -1;
+ }
/* Set NLRI portion to structure. */
update.afi = AFI_IP;
@@ -1653,15 +1673,20 @@ bgp_update_receive (struct peer *peer, bgp_size_t size)
update. */
ret = bgp_attr_check (peer, &attr);
if (ret < 0)
- return -1;
+ {
+ bgp_attr_unintern_sub (&attr);
+ if (attr.extra)
+ bgp_attr_extra_free (&attr);
+ return -1;
+ }
- bgp_nlri_parse (peer, &attr, &update);
+ bgp_nlri_parse (peer, NLRI_ATTR_ARG, &update);
}
if (mp_update.length
&& mp_update.afi == AFI_IP
&& mp_update.safi == SAFI_UNICAST)
- bgp_nlri_parse (peer, &attr, &mp_update);
+ bgp_nlri_parse (peer, NLRI_ATTR_ARG, &mp_update);
if (mp_withdraw.length
&& mp_withdraw.afi == AFI_IP
@@ -1688,7 +1713,7 @@ bgp_update_receive (struct peer *peer, bgp_size_t size)
if (mp_update.length
&& mp_update.afi == AFI_IP
&& mp_update.safi == SAFI_MULTICAST)
- bgp_nlri_parse (peer, &attr, &mp_update);
+ bgp_nlri_parse (peer, NLRI_ATTR_ARG, &mp_update);
if (mp_withdraw.length
&& mp_withdraw.afi == AFI_IP
@@ -1718,7 +1743,7 @@ bgp_update_receive (struct peer *peer, bgp_size_t size)
if (mp_update.length
&& mp_update.afi == AFI_IP6
&& mp_update.safi == SAFI_UNICAST)
- bgp_nlri_parse (peer, &attr, &mp_update);
+ bgp_nlri_parse (peer, NLRI_ATTR_ARG, &mp_update);
if (mp_withdraw.length
&& mp_withdraw.afi == AFI_IP6
@@ -1747,7 +1772,7 @@ bgp_update_receive (struct peer *peer, bgp_size_t size)
if (mp_update.length
&& mp_update.afi == AFI_IP6
&& mp_update.safi == SAFI_MULTICAST)
- bgp_nlri_parse (peer, &attr, &mp_update);
+ bgp_nlri_parse (peer, NLRI_ATTR_ARG, &mp_update);
if (mp_withdraw.length
&& mp_withdraw.afi == AFI_IP6
@@ -1775,7 +1800,7 @@ bgp_update_receive (struct peer *peer, bgp_size_t size)
if (mp_update.length
&& mp_update.afi == AFI_IP
&& mp_update.safi == SAFI_MPLS_LABELED_VPN)
- bgp_nlri_parse_vpnv4 (peer, &attr, &mp_update);
+ bgp_nlri_parse_vpnv4 (peer, NLRI_ATTR_ARG, &mp_update);
if (mp_withdraw.length
&& mp_withdraw.afi == AFI_IP
@@ -1797,21 +1822,10 @@ bgp_update_receive (struct peer *peer, bgp_size_t size)
/* Everything is done. We unintern temporary structures which
interned in bgp_attr_parse(). */
- if (attr.aspath)
- aspath_unintern (attr.aspath);
- if (attr.community)
- community_unintern (attr.community);
+ bgp_attr_unintern_sub (&attr);
if (attr.extra)
- {
- if (attr.extra->ecommunity)
- ecommunity_unintern (attr.extra->ecommunity);
- if (attr.extra->cluster)
- cluster_unintern (attr.extra->cluster);
- if (attr.extra->transit)
- transit_unintern (attr.extra->transit);
- bgp_attr_extra_free (&attr);
- }
-
+ bgp_attr_extra_free (&attr);
+
/* If peering is stopped due to some reason, do not generate BGP
event. */
if (peer->status != Established)
@@ -2028,7 +2042,7 @@ bgp_route_refresh_receive (struct peer *peer, bgp_size_t size)
* as possible without going beyond the bounds of the entry,
* to maximise debug information.
*/
- int ok ;
+ int ok;
memset (&orfp, 0, sizeof (struct orf_prefix));
common = *p_pnt++;
/* after ++: p_pnt <= p_end */
@@ -2042,9 +2056,9 @@ bgp_route_refresh_receive (struct peer *peer, bgp_size_t size)
ok = ((p_end - p_pnt) >= sizeof(u_int32_t)) ;
if (ok)
{
- memcpy (&seq, p_pnt, sizeof (u_int32_t));
- p_pnt += sizeof (u_int32_t);
- orfp.seq = ntohl (seq);
+ memcpy (&seq, p_pnt, sizeof (u_int32_t));
+ p_pnt += sizeof (u_int32_t);
+ orfp.seq = ntohl (seq);
}
else
p_pnt = p_end ;
@@ -2082,16 +2096,17 @@ bgp_route_refresh_receive (struct peer *peer, bgp_size_t size)
inet_ntop (orfp.p.family, &orfp.p.u.prefix, buf, BUFSIZ),
orfp.p.prefixlen, orfp.ge, orfp.le,
ok ? "" : " MALFORMED");
-
+
if (ok)
- ret = prefix_bgp_orf_set (name, afi, &orfp,
- (common & ORF_COMMON_PART_DENY ? 0 : 1 ),
- (common & ORF_COMMON_PART_REMOVE ? 0 : 1));
+ ret = prefix_bgp_orf_set (name, afi, &orfp,
+ (common & ORF_COMMON_PART_DENY ? 0 : 1 ),
+ (common & ORF_COMMON_PART_REMOVE ? 0 : 1));
if (!ok || (ret != CMD_SUCCESS))
{
if (BGP_DEBUG (normal, NORMAL))
- zlog_debug ("%s Received misformatted prefixlist ORF. Remove All pfxlist", peer->host);
+ zlog_debug ("%s Received misformatted prefixlist ORF."
+ " Remove All pfxlist", peer->host);
prefix_bgp_orf_remove_all (name);
break;
}
@@ -2276,12 +2291,13 @@ bgp_read_packet (struct peer *peer)
return 0;
/* Read packet from fd. */
- nbytes = stream_read_unblock (peer->ibuf, peer->fd, readsize);
+ nbytes = stream_read_try (peer->ibuf, peer->fd, readsize);
/* If read byte is smaller than zero then error occured. */
if (nbytes < 0)
{
- if (errno == EAGAIN)
+ /* Transient error should retry */
+ if (nbytes == -2)
return -1;
plog_err (peer->log, "%s [Error] bgp_read_packet error: %s",