The following patch, created by Marco Munari is provided ``as is''. A brief documentation (by Marco Munari): ``RFC 1323, TCP Extensions for High Performance, Appendix A'' contains a recommendation respected by OpenBSD but not optimally applied also in the SYN packets: [ Network Working Group V. Jacobson [ Request for Comments: 1323 LBL [ Obsoletes: RFC 1072, RFC 1185 R. Braden [ ISI [ D. Borman [ Cray Research [ May 1992 [ [ [ TCP Extensions for High Performance ... [ APPENDIX A: IMPLEMENTATION SUGGESTIONS [ ] The following layouts are recommended for sending options on non-SYN ] segments, to achieve maximum feasible alignment of 32-bit and 64-bit ] machines. ] ] ] +--------+--------+--------+--------+ ] | NOP | NOP | TSopt | 10 | ] +--------+--------+--------+--------+ ] | TSval timestamp | ] +--------+--------+--------+--------+ ] | TSecr timestamp | ] +--------+--------+--------+--------+ I think that the NOP aspect of ``Appendix A'' is not recommendable, so the presence of NOP options is only valid as an example, the alignment of timestamp is the real recommendable aspect. The layout, equivalent in efficiency, should be: [The ``octects binary network addresses'' are specified as pattern for each columns in the headline] ...00 ...01 ...10 ...11 +--------+--------+--------+--------+ | . . . | . . . | TSopt | 10 | +--------+--------+--------+--------+ network address +100(=+4) | TSval timestamp | +--------+--------+--------+--------+ | TSecr timestamp | +--------+--------+--------+--------+ Where the unspecified part could be efficiently filled with (in order of preference): one 2-octets in length option, two 1-octet in length options, one 1-octet in length option followed by NOP(0x01) or a double NOP. So, in the non-SYN packets the preference of NOP for alignment is opportune because the TS is typically the only option. 32bit-only-minimum machines have just to filter the NOPs (or other options) in the alignment position with one bitwise operation. The agreeable part of the appendix A, is to avoid the following impolite alignment (in theory permitted by TCP): +--------+--------+--------+--------+ | TSopt | 10 | TSval timestampH.. +--------+--------+--------+--------+ ..TSval timestampL| TSecr timestampH.. +--------+--------+--------+--------+ ..TSecr timestampL| +--------+--------+ which is 16 bits shorter, but in an expensive way. The proof is in the code, my modification to an Operating System respecting the appendix. (resulting more efficient on 16bit-also architectures because it access pieces of 16bits of memory insted of 32, 4 byte less are sent to the network each syn packet, and more interoperative because it identifies the TSopt header with the 16 bit value of TSopt header). I applied to OpenBSD, CVS from :ext:anoncvs@anoncvs.ca.openbsd.org:/cvs $ grep openbsd /home/mar/.ssh/known_hosts anoncvs.ca.openbsd.org,129.128.5.191 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAIEAsQpVyGYI7vjnNUfWBSQe2jq9Fdgv/S4/yvBSIcRhPpuyPeUlNxLf9Vey9paxbowhcCyu+xk/Mwz+L15UPg9If2PYN0NG7+ayNqTpS+eP6bE6rbqtCdFSBEM9zRuZUln14kGwSgJYQqcT/qDt80Ro8Z+zSh9MCQuLbIrspSKYx88= a new TCP signature will result, you can record it as follows: 16384:64:1:60:M*,N,W0,S,T: OpenBSD:3.8:mm-obsd:OpenBSD 3.8 (MunARi) --- OpenBSD/sys/netinet/tcp_output.c 05 Jul 2005 10:40:54 +0100 1.79 +++ sys/netinet/tcp_output.c 14 Jan 2006 04:11:24 +0000 @@ -68,6 +68,28 @@ * Research Laboratory (NRL). */ + +/* + * Additional improvements + * copyright (c) 2005, 2006 by Marco Munari + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of allerta nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * SOURCE CODE PROVIDED ``AS IS'' WITHOUT WARRANTIES, BUT WITH HAPPY FUN + * AND A SORT OF LOVE. + * + */ + #include #include #include @@ -105,6 +126,7 @@ #ifdef TCP_SACK extern int tcprexmtthresh; #endif +int stdbsdtcp=1; #ifdef TCP_SACK #ifdef TCP_SACK_DEBUG @@ -139,17 +161,16 @@ return (NULL); p = tp->snd_holes; while (p) { -#ifndef TCP_FACK - if (p->dups >= tcprexmtthresh && SEQ_LT(p->rxmit, p->end)) { -#else /* In FACK, if p->dups is less than tcprexmtthresh, but * snd_fack advances more than tcprextmtthresh * tp->t_maxseg, * tcp_input() will try fast retransmit. This forces output. */ - if ((p->dups >= tcprexmtthresh || - tp->t_dupacks == tcprexmtthresh) && - SEQ_LT(p->rxmit, p->end)) { + if ((p->dups >= tcprexmtthresh +#ifdef TCP_FACK + || tp->t_dupacks == tcprexmtthresh #endif /* TCP_FACK */ + ) && SEQ_LT(p->rxmit, p->end)) { + if (SEQ_LT(p->rxmit, tp->snd_una)) {/* old SACK hole */ p = p->next; continue; @@ -216,7 +237,9 @@ u_char opt[MAX_TCPOPTLEN]; unsigned int optlen, hdrlen, packetlen; int idle, sendalot = 0; + int tstmp_cond; /* cache a complex condition now used twice */ #ifdef TCP_SACK + int tsack_cond = 0; int i, sack_rxmit = 0; struct sackhole *p; #endif @@ -540,6 +563,11 @@ return (EPFNOSUPPORT); } + tstmp_cond = ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && + (flags & TH_RST) == 0 && + ((flags & (TH_SYN|TH_ACK)) == TH_SYN || + (tp->t_flags & TF_RCVD_TSTMP))); + if (flags & TH_SYN) { tp->snd_nxt = tp->iss; if ((tp->t_flags & TF_NOOPT) == 0) { @@ -561,10 +589,14 @@ * already done so. */ if (tp->sack_enable && ((flags & TH_ACK) == 0 || - (tp->t_flags & TF_SACK_PERMIT))) { - *((u_int32_t *) (opt + optlen)) = - htonl(TCPOPT_SACK_PERMIT_HDR); - optlen += 4; + (tp->t_flags & TF_SACK_PERMIT))) { + if (stdbsdtcp || !tstmp_cond) { + *((u_int32_t *) (opt + optlen)) = + htonl(TCPOPT_SACK_PERMIT_HDR); + optlen += 4; + } else + /* sack will be packed in timestamp */ + tsack_cond=1; } #endif @@ -586,16 +618,26 @@ * wants to use timestamps (TF_REQ_TSTMP is set) or both our side * and our peer have sent timestamps in our SYN's. */ - if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && - (flags & TH_RST) == 0 && - ((flags & (TH_SYN|TH_ACK)) == TH_SYN || - (tp->t_flags & TF_RCVD_TSTMP))) { + if (tstmp_cond) { u_int32_t *lp = (u_int32_t *)(opt + optlen); + *lp = htonl(TCPOPT_TSTAMP_HDR); /* Form timestamp option as shown in appendix A of RFC 1323. */ - *lp++ = htonl(TCPOPT_TSTAMP_HDR); - *lp++ = htonl(tcp_now + tp->ts_modulate); - *lp = htonl(tp->ts_recent); +#ifdef TCP_SACK + /* if needed sackOK as before but not just done */ + if (tsack_cond) { +#if 1 + *(u_int16_t*)lp = htons( TCPOPT_SACK_PERMITTED<<8| + TCPOLEN_SACK_PERMITTED ); +#else /* two similar ways, i preferre the first (above) */ + *lp &= htonl(0x0000ffff); + *lp |= htonl( TCPOPT_SACK_PERMITTED<<24| + TCPOLEN_SACK_PERMITTED<<16 ); +#endif + } +#endif + *++lp = htonl(tcp_now + tp->ts_modulate); + *++lp = htonl(tp->ts_recent); optlen += TCPOLEN_TSTAMP_APPA; } --- OpenBSD/sys/netinet/tcp_input.c 07 Dec 2005 03:31:38 +0000 1.194 +++ sys/netinet/tcp_input.c 11 Jan 2006 04:20:26 +0000 @@ -68,6 +68,27 @@ * Research Laboratory (NRL). */ + +/* + * Additional improvements + * copyright (c) 2005, 2006 by Marco Munari + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of allerta nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * SOURCE CODE PROVIDED ``AS IS'' WITHOUT WARRANTIES, BUT WITH CARE. + * + */ + #include #include #include @@ -112,6 +133,7 @@ int tcptv_keep_init = TCPTV_KEEP_INIT; extern u_long sb_max; +extern int stdbsdtcp; int tcp_rst_ppslim = 100; /* 100pps */ int tcp_rst_ppslim_count = 0; @@ -565,11 +587,18 @@ * formatted as recommended in RFC 1323 appendix A, we * quickly get the values now and not bother calling * tcp_dooptions(), etc. + * MARco doesn't recommend only the NOP aspect of appendix A, + * (u_int16_t should be appropriate and more efficient on most + * arch/ efficient enough on only-32-or-more bits arch.s, + * in addition to the 4 network bytes overload in each packet + * carring TCP options (with SackOK(len 2) and TIMESTAMP(len 10)) + * before the default was a _double_ NOP NOP use) */ if ((optlen == TCPOLEN_TSTAMP_APPA || (optlen > TCPOLEN_TSTAMP_APPA && optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) && - *(u_int32_t *)optp == htonl(TCPOPT_TSTAMP_HDR) && + *((u_int16_t *)optp + 1) == htons(TCPOPT_TIMESTAMP<<8 + |TCPOLEN_TIMESTAMP) && (th->th_flags & TH_SYN) == 0) { opti.ts_present = 1; opti.ts_val = ntohl(*(u_int32_t *)(optp + 4)); @@ -4141,7 +4170,8 @@ /* Compute the size of the TCP options. */ optlen = 4 + (sc->sc_request_r_scale != 15 ? 4 : 0) + #ifdef TCP_SACK - ((sc->sc_flags & SCF_SACK_PERMIT) ? 4 : 0) + + ((sc->sc_flags & SCF_SACK_PERMIT && + (stdbsdtcp || !(sc->sc_flags & SCF_TIMESTAMP))) ? 4 : 0) + #endif #ifdef TCP_SIGNATURE ((sc->sc_flags & SCF_SIGNATURE) ? TCPOLEN_SIGLEN : 0) + @@ -4223,27 +4253,31 @@ *optp++ = sc->sc_ourmaxseg & 0xff; #ifdef TCP_SACK - /* Include SACK_PERMIT_HDR option if peer has already done so. */ - if (sc->sc_flags & SCF_SACK_PERMIT) { - *((u_int32_t *)optp) = htonl(TCPOPT_SACK_PERMIT_HDR); - optp += 4; - } + /* Include SACK_PERMIT_HDR option if peer has already done so. + * do here only if sackOK can't be packed in timestamp block */ + if ( sc->sc_flags & SCF_SACK_PERMIT && + (stdbsdtcp || !(sc->sc_flags & SCF_TIMESTAMP)) ) + *(((u_int32_t *)optp)++) = htonl(TCPOPT_SACK_PERMIT_HDR); #endif - if (sc->sc_request_r_scale != 15) { - *((u_int32_t *)optp) = htonl(TCPOPT_NOP << 24 | - TCPOPT_WINDOW << 16 | TCPOLEN_WINDOW << 8 | - sc->sc_request_r_scale); - optp += 4; - } + if (sc->sc_request_r_scale != 15) + *(((u_int32_t *)optp)++) = htonl(TCPOPT_NOP << 24 | + TCPOPT_WINDOW << 16 | TCPOLEN_WINDOW << 8 | + sc->sc_request_r_scale); if (sc->sc_flags & SCF_TIMESTAMP) { - u_int32_t *lp = (u_int32_t *)(optp); + u_int32_t *lp = (u_int32_t *)optp; /* Form timestamp option as shown in appendix A of RFC 1323. */ - *lp++ = htonl(TCPOPT_TSTAMP_HDR); + *lp = htonl(TCPOPT_TSTAMP_HDR); +#ifdef TCP_SACK + if (!stdbsdtcp && sc->sc_flags & SCF_SACK_PERMIT) + /* set SACK_PERMIT option insted of NOP NOP */ + *(u_int16_t*)lp = htons( TCPOPT_SACK_PERMITTED<<8| + TCPOLEN_SACK_PERMITTED ); +#endif sc->sc_modulate = arc4random(); - *lp++ = htonl(SYN_CACHE_TIMESTAMP(sc)); - *lp = htonl(sc->sc_timestamp); + *++lp = htonl(SYN_CACHE_TIMESTAMP(sc)); + *++lp = htonl(sc->sc_timestamp); optp += TCPOLEN_TSTAMP_APPA; } --- OpenBSD/sys/netinet/tcp.h 12 Dec 2005 05:06:57 +0000 1.16 +++ sys/netinet/tcp.h 09 Jan 2006 04:41:03 +0000 @@ -32,6 +32,12 @@ * @(#)tcp.h 8.1 (Berkeley) 6/10/93 */ +/* + * improvements 2006 by Marco Munari + * available to canadian OpenBSD + */ + + #ifndef _NETINET_TCP_H_ #define _NETINET_TCP_H_ @@ -63,6 +69,7 @@ #define TH_URG 0x20 #define TH_ECE 0x40 #define TH_CWR 0x80 +#define TH_BITS "\20\2SYN\5ACK\1FIN\3RST\4PUSH\6URG\7ECE\10CWR" u_int16_t th_win; /* window */ u_int16_t th_sum; /* checksum */ u_int16_t th_urp; /* urgent pointer */ @@ -93,6 +100,8 @@ (TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP) /* Option definitions */ + + /* Warning: redundant with NOP, use it only in writing */ #define TCPOPT_SACK_PERMIT_HDR \ (TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_SACK_PERMITTED<<8|TCPOLEN_SACK_PERMITTED) #define TCPOPT_SACK_HDR (TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_SACK<<8) --- OpenBSD/etc/pf.os 25 May 2005 09:15:12 +0100 1.19 +++ etc/pf.os 24 Jan 2006 07:27:45 +0000 @@ -304,6 +304,7 @@ 57344:64:0:64:M*,N,N,S,N,W0,N,N,T: OpenBSD:3.3-3.7:no-df:OpenBSD 3.3-3.7 (scrub no-df) 65535:64:1:64:M*,N,N,S,N,W0,N,N,T: OpenBSD:3.0-3.7:opera:OpenBSD 3.0-3.7 (Opera) +16384:64:1:60:M*,N,W0,S,T: OpenBSD:3.8:mm-obsd:OpenBSD 3.8 (MunARi) # ----------------- Solaris -----------------