diff --git a/CHANGES.html b/CHANGES.html
index bf927dc3a..09646ab32 100644
--- a/CHANGES.html
+++ b/CHANGES.html
@@ -57,6 +57,7 @@ us a note on ns-developers mailing list.
A new TCP congestion control, TcpLinuxReno, has been added.
Added, to PIE queue disc, queue delay calculation using timestamp feature (Linux default behavior), cap drop adjustment feature (Section 5.5 of RFC 8033), ECN (Section 5.1 of RFC 8033) and derandomization feature (Section 5.4 of RFC 8033).
Added L4S Mode to FqCoDel and CoDel queue discs
+
A model for dynamic pacing has been added to TCP.
Changes to existing API:
@@ -73,8 +74,9 @@ by including flow-monitor.h you will need to change that to stats-module.h.
Changed behavior:
Support for RIFS has been dropped from wifi. RIFS has been obsoleted by the 802.11 standard and support for it was not implemented according to the standard.
-
The behavior of TcpPrrRecovery algorithm was aligned to that of Linux.
+
The behavior of TcpPrrRecovery algorithm was aligned to that of Linux.
PIE queue disc now uses Timestamp for queue delay calculation as default instead of Dequeue Rate Estimator
+
TCP pacing, when enabled, now adjusts the rate dynamically based on the window size, rather than just enforcing a constant rate.
diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index 21e923bae..2e3539973 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -22,6 +22,7 @@ New user-visible features
(Section 5.5 of RFC 8033), ECN (Section 5.1 of RFC 8033) and derandomization
feature (Section 5.4 of RFC 8033).
- (traffic-control) Add support for L4S mode to CoDel and FqCoDel queue discs
+- (tcp) Support for dynamic pacing in TCP.
Bugs fixed
----------
diff --git a/examples/tcp/tcp-pacing.cc b/examples/tcp/tcp-pacing.cc
index 6a148f2a7..969a23643 100644
--- a/examples/tcp/tcp-pacing.cc
+++ b/examples/tcp/tcp-pacing.cc
@@ -1,6 +1,6 @@
/* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
/*
- * Copyright (c) 2017 NITK Surathkal
+ * Copyright (c) 2020 NITK Surathkal
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -15,21 +15,72 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
- * Author: Vivek Jain
+ * Authors: Vivek Jain
+ * Deepak Kumaraswamy
*/
-// Network topology
+// The following network topology is used in this example, and is taken from
+// Figure 2 of https://homes.cs.washington.edu/~tom/pubs/pacing.pdf
+//
+// n0 n4
+// | |
+// |(4x Mbps, 5ms) |(4x Mbps, 5ms)
+// | |
+// | |
+// | (x Mbps, 40ms) |
+// n2 ------------------------ n3
+// | |
+// | |
+// |(4x Mbps, 5ms) |(4x Mbps, 5ms)
+// | |
+// n1 n5
+//
//
-// n0 ----------- n1
-// 40 Gbps
-// 0.01 ms
-// This programs illustrates how TCP pacing can be used and how user can set
-// pacing rate. The program gives information about each flow like transmitted
-// and received bytes (packets) and throughput of that flow. Currently, it is
-// using TCP NewReno but in future after having congestion control algorithms
-// which can change pacing rate can be used.
+// This example illustrates how TCP pacing can be enabled on a socket.
+// Two long-running TCP flows are instantiated at nodes n0 and n1 to
+// send data over a bottleneck link (n2->n3) to sink nodes n4 and n5.
+// At the end of the simulation, the IP-level flow monitor tool will
+// print out summary statistics of the flows. The flow monitor detects
+// four flows, but that is because the flow records are unidirectional;
+// the latter two flows reported are actually ack streams.
+//
+// At the end of this simulation, data files are also generated
+// that track changes in Congestion Window, Slow Start threshold and
+// TCP pacing rate for the first flow (n0). Additionally, a data file
+// that contains information about packet transmission and reception times
+// (collected through TxTrace and RxTrace respectively) is also produced.
+// This transmission and reception (ack) trace is the most direct way to
+// observe the effects of pacing. All the above information is traced
+// just for the single node n0.
+//
+// A small amount of randomness is introduced to the program to control
+// the start time of the flows.
+//
+// This example has pacing enabled by default, which means that TCP
+// does not send packets back-to-back, but instead paces them out over
+// an RTT. The size of initial congestion window is set to 10, and pacing
+// of the initial window is enabled. The available command-line options and
+// their default values can be observed in the usual way by running the
+// program to print the help info; i.e.: ./waf --run 'tcp-pacing --PrintHelp'
+//
+// When pacing is disabled, TCP sends eligible packets back-to-back. The
+// differences in behaviour when pacing is disabled can be observed from the
+// packet transmission data file. For instance, one can observe that
+// packets in the initial window are sent one after the other simultaneously,
+// without any inter-packet gaps. Another instance is when n0 receives a
+// packet in the form of an acknowledgement, and sends out data packets without
+// pacing them.
+//
+// Although this example serves as a useful demonstration of how pacing could
+// be enabled/disabled in ns-3 TCP congestion controls, we could not observe
+// significant improvements in throughput for the above topology when pacing
+// was enabled. In future, one could try and incorporate models such as
+// TCP Prague and ACK-filtering, which may show a stronger performance
+// impact for TCP pacing.
+#include
+#include
#include
#include
#include "ns3/core-module.h"
@@ -39,100 +90,219 @@
#include "ns3/network-module.h"
#include "ns3/packet-sink.h"
#include "ns3/flow-monitor-module.h"
+#include "ns3/ipv4-global-routing-helper.h"
+#include "ns3/traffic-control-module.h"
using namespace ns3;
NS_LOG_COMPONENT_DEFINE ("TcpPacingExample");
+std::ofstream cwndStream;
+std::ofstream pacingRateStream;
+std::ofstream ssThreshStream;
+std::ofstream packetTraceStream;
+
+static void
+CwndTracer (uint32_t oldval, uint32_t newval)
+{
+ cwndStream << std::fixed << std::setprecision (6) << Simulator::Now ().GetSeconds () << std::setw (12) << newval << std::endl;
+}
+
+static void
+PacingRateTracer (DataRate oldval, DataRate newval)
+{
+ pacingRateStream << std::fixed << std::setprecision (6) << Simulator::Now ().GetSeconds () << std::setw (12) << newval.GetBitRate () / 1e6 << std::endl;
+}
+
+static void
+SsThreshTracer (uint32_t oldval, uint32_t newval)
+{
+ ssThreshStream << std::fixed << std::setprecision (6) << Simulator::Now ().GetSeconds () << std::setw (12) << newval << std::endl;
+}
+
+static void
+TxTracer (Ptr p, Ptr ipv4, uint32_t interface)
+{
+ packetTraceStream << std::fixed << std::setprecision (6) << Simulator::Now ().GetSeconds () << " tx " << p->GetSize () << std::endl;
+}
+
+static void
+RxTracer (Ptr p, Ptr ipv4, uint32_t interface)
+{
+ packetTraceStream << std::fixed << std::setprecision (6) << Simulator::Now ().GetSeconds () << " rx " << p->GetSize () << std::endl;
+}
+
+void
+ConnectSocketTraces (void)
+{
+ Config::ConnectWithoutContext ("/NodeList/0/$ns3::TcpL4Protocol/SocketList/0/CongestionWindow", MakeCallback (&CwndTracer));
+ Config::ConnectWithoutContext ("/NodeList/0/$ns3::TcpL4Protocol/SocketList/0/PacingRate", MakeCallback (&PacingRateTracer));
+ Config::ConnectWithoutContext ("/NodeList/0/$ns3::TcpL4Protocol/SocketList/0/SlowStartThreshold", MakeCallback (&SsThreshTracer));
+ Config::ConnectWithoutContext ("/NodeList/0/$ns3::Ipv4L3Protocol/Tx", MakeCallback (&TxTracer));
+ Config::ConnectWithoutContext ("/NodeList/0/$ns3::Ipv4L3Protocol/Rx", MakeCallback (&RxTracer));
+}
+
int
main (int argc, char *argv[])
{
-
bool tracing = false;
- uint32_t maxBytes = 0;
- uint32_t TCPFlows = 1;
+
+ uint32_t maxBytes = 0; // value of zero corresponds to unlimited send
+ std::string transportProtocol = "ns3::TcpNewReno";
+
+ Time simulationEndTime = Seconds (5);
+ DataRate bottleneckBandwidth ("10Mbps"); // value of x as shown in the above network topology
+ Time bottleneckDelay = MilliSeconds (40);
+ DataRate regLinkBandwidth = DataRate (4 * bottleneckBandwidth.GetBitRate ());
+ Time regLinkDelay = MilliSeconds (5);
+ DataRate maxPacingRate ("4Gbps");
+
bool isPacingEnabled = true;
- std::string pacingRate = "4Gbps";
- bool isSack = false;
- uint32_t maxPackets = 0;
+ bool useEcn = true;
+ bool useQueueDisc = true;
+ bool shouldPaceInitialWindow = true;
+
+ // Configure defaults that are not based on explicit command-line arguments
+ // They may be overridden by general attribute configuration of command line
+ Config::SetDefault ("ns3::TcpL4Protocol::SocketType", TypeIdValue (TypeId::LookupByName (transportProtocol)));
+ Config::SetDefault ("ns3::TcpSocket::InitialCwnd", UintegerValue (10));
CommandLine cmd (__FILE__);
- cmd.AddValue ("tracing", "Flag to enable/disable tracing", tracing);
- cmd.AddValue ("maxBytes",
- "Total number of bytes for application to send", maxBytes);
- cmd.AddValue ("maxPackets",
- "Total number of bytes for application to send", maxPackets);
- cmd.AddValue ("TCPFlows", "Number of application flows between sender and receiver", TCPFlows);
- cmd.AddValue ("Pacing", "Flag to enable/disable pacing in TCP", isPacingEnabled);
- cmd.AddValue ("Sack", "Flag to enable/disable sack in TCP", isSack);
- cmd.AddValue ("PacingRate", "Max Pacing Rate in bps", pacingRate);
+ cmd.AddValue ("tracing", "Flag to enable/disable Ascii and Pcap tracing", tracing);
+ cmd.AddValue ("maxBytes", "Total number of bytes for application to send", maxBytes);
+ cmd.AddValue ("isPacingEnabled", "Flag to enable/disable pacing in TCP", isPacingEnabled);
+ cmd.AddValue ("maxPacingRate", "Max Pacing Rate", maxPacingRate);
+ cmd.AddValue ("useEcn", "Flag to enable/disable ECN", useEcn);
+ cmd.AddValue ("useQueueDisc", "Flag to enable/disable queue disc on bottleneck", useQueueDisc);
+ cmd.AddValue ("shouldPaceInitialWindow", "Flag to enable/disable pacing of TCP initial window", shouldPaceInitialWindow);
+ cmd.AddValue ("simulationEndTime", "Simulation end time", simulationEndTime);
cmd.Parse (argc, argv);
- if (maxPackets != 0 )
- {
- maxBytes = 500 * maxPackets;
- }
-
- Config::SetDefault ("ns3::TcpSocketState::MaxPacingRate", StringValue (pacingRate));
+ // Configure defaults based on command-line arguments
Config::SetDefault ("ns3::TcpSocketState::EnablePacing", BooleanValue (isPacingEnabled));
- Config::SetDefault ("ns3::TcpSocketBase::Sack", BooleanValue (isSack));
+ Config::SetDefault ("ns3::TcpSocketState::PaceInitialWindow", BooleanValue (shouldPaceInitialWindow));
+ Config::SetDefault ("ns3::TcpSocketBase::UseEcn", (useEcn ? EnumValue (TcpSocketState::On) : EnumValue (TcpSocketState::Off)));
+ Config::SetDefault ("ns3::TcpSocketState::MaxPacingRate", DataRateValue (maxPacingRate));
NS_LOG_INFO ("Create nodes.");
- NodeContainer nodes;
- nodes.Create (2);
+ NodeContainer c;
+ c.Create (6);
NS_LOG_INFO ("Create channels.");
- PointToPointHelper pointToPoint;
- pointToPoint.SetDeviceAttribute ("DataRate", StringValue ("40Gbps"));
- pointToPoint.SetChannelAttribute ("Delay", StringValue ("0.01ms"));
+ NodeContainer n0n2 = NodeContainer (c.Get (0), c.Get (2));
+ NodeContainer n1n2 = NodeContainer (c.Get (1), c.Get (2));
- NetDeviceContainer devices;
- devices = pointToPoint.Install (nodes);
+ NodeContainer n2n3 = NodeContainer (c.Get (2), c.Get (3));
- InternetStackHelper internet;
- internet.Install (nodes);
+ NodeContainer n3n4 = NodeContainer (c.Get (3), c.Get (4));
+ NodeContainer n3n5 = NodeContainer (c.Get (3), c.Get (5));
+
+ //Define Node link properties
+ PointToPointHelper regLink;
+ regLink.SetDeviceAttribute ("DataRate", DataRateValue (regLinkBandwidth));
+ regLink.SetChannelAttribute ("Delay", TimeValue (regLinkDelay));
+
+ NetDeviceContainer d0d2 = regLink.Install (n0n2);
+ NetDeviceContainer d1d2 = regLink.Install (n1n2);
+ NetDeviceContainer d3d4 = regLink.Install (n3n4);
+ NetDeviceContainer d3d5 = regLink.Install (n3n5);
+
+ PointToPointHelper bottleNeckLink;
+ bottleNeckLink.SetDeviceAttribute ("DataRate", DataRateValue (bottleneckBandwidth));
+ bottleNeckLink.SetChannelAttribute ("Delay", TimeValue (bottleneckDelay));
+
+ NetDeviceContainer d2d3 = bottleNeckLink.Install (n2n3);
+
+ //Install Internet stack
+ InternetStackHelper stack;
+ stack.Install (c);
+
+ // Install traffic control
+ if (useQueueDisc)
+ {
+ TrafficControlHelper tchBottleneck;
+ tchBottleneck.SetRootQueueDisc ("ns3::FqCoDelQueueDisc");
+ tchBottleneck.Install (d2d3);
+ }
NS_LOG_INFO ("Assign IP Addresses.");
Ipv4AddressHelper ipv4;
ipv4.SetBase ("10.1.1.0", "255.255.255.0");
- Ipv4InterfaceContainer i = ipv4.Assign (devices);
+ Ipv4InterfaceContainer regLinkInterface0 = ipv4.Assign (d0d2);
+
+ ipv4.SetBase ("10.1.2.0", "255.255.255.0");
+ Ipv4InterfaceContainer regLinkInterface1 = ipv4.Assign (d1d2);
+
+ ipv4.SetBase ("10.1.3.0", "255.255.255.0");
+ Ipv4InterfaceContainer bottleneckInterface = ipv4.Assign (d2d3);
+
+ ipv4.SetBase ("10.1.4.0", "255.255.255.0");
+ Ipv4InterfaceContainer regLinkInterface4 = ipv4.Assign (d3d4);
+
+ ipv4.SetBase ("10.1.5.0", "255.255.255.0");
+ Ipv4InterfaceContainer regLinkInterface5 = ipv4.Assign (d3d5);
+
+ Ipv4GlobalRoutingHelper::PopulateRoutingTables ();
NS_LOG_INFO ("Create Applications.");
- ApplicationContainer sourceApps;
- ApplicationContainer sinkApps;
- for (uint32_t iterator = 0; iterator < TCPFlows; iterator++)
- {
- uint16_t port = 10000 + iterator;
+ // Two Sink Applications at n4 and n5
+ uint16_t sinkPort = 8080;
+ Address sinkAddress4 (InetSocketAddress (regLinkInterface4.GetAddress (1), sinkPort)); // interface of n4
+ Address sinkAddress5 (InetSocketAddress (regLinkInterface5.GetAddress (1), sinkPort)); // interface of n5
+ PacketSinkHelper packetSinkHelper ("ns3::TcpSocketFactory", InetSocketAddress (Ipv4Address::GetAny (), sinkPort));
+ ApplicationContainer sinkApps4 = packetSinkHelper.Install (c.Get (4)); //n4 as sink
+ ApplicationContainer sinkApps5 = packetSinkHelper.Install (c.Get (5)); //n5 as sink
- BulkSendHelper source ("ns3::TcpSocketFactory",
- InetSocketAddress (i.GetAddress (1), port));
- // Set the amount of data to send in bytes. Zero is unlimited.
- source.SetAttribute ("MaxBytes", UintegerValue (maxBytes));
- sourceApps.Add (source.Install (nodes.Get (0)));
+ sinkApps4.Start (Seconds (0));
+ sinkApps4.Stop (simulationEndTime);
+ sinkApps5.Start (Seconds (0));
+ sinkApps5.Stop (simulationEndTime);
- PacketSinkHelper sink ("ns3::TcpSocketFactory",
- InetSocketAddress (Ipv4Address::GetAny (), port));
- sinkApps.Add (sink.Install (nodes.Get (1)));
- }
+ // Randomize the start time between 0 and 1ms
+ Ptr uniformRv = CreateObject ();
+ uniformRv->SetStream (0);
- sinkApps.Start (Seconds (0.0));
- sinkApps.Stop (Seconds (5));
- sourceApps.Start (Seconds (1));
- sourceApps.Stop (Seconds (5));
+ // Two Source Applications at n0 and n1
+ BulkSendHelper source0 ("ns3::TcpSocketFactory", sinkAddress4);
+ BulkSendHelper source1 ("ns3::TcpSocketFactory", sinkAddress5);
+ // Set the amount of data to send in bytes. Zero is unlimited.
+ source0.SetAttribute ("MaxBytes", UintegerValue (maxBytes));
+ source1.SetAttribute ("MaxBytes", UintegerValue (maxBytes));
+ ApplicationContainer sourceApps0 = source0.Install (c.Get (0));
+ ApplicationContainer sourceApps1 = source1.Install (c.Get (1));
+
+ sourceApps0.Start (MicroSeconds (uniformRv->GetInteger (0, 1000)));
+ sourceApps0.Stop (simulationEndTime);
+ sourceApps1.Start (MicroSeconds (uniformRv->GetInteger (0, 1000)));
+ sourceApps1.Stop (simulationEndTime);
if (tracing)
{
AsciiTraceHelper ascii;
- pointToPoint.EnableAsciiAll (ascii.CreateFileStream ("tcp-pacing.tr"));
- pointToPoint.EnablePcapAll ("tcp-pacing", false);
+ regLink.EnableAsciiAll (ascii.CreateFileStream ("tcp-dynamic-pacing.tr"));
+ regLink.EnablePcapAll ("tcp-dynamic-pacing", false);
}
+ cwndStream.open ("tcp-dynamic-pacing-cwnd.dat", std::ios::out);
+ cwndStream << "#Time(s) Congestion Window (B)" << std::endl;
+
+ pacingRateStream.open ("tcp-dynamic-pacing-pacing-rate.dat", std::ios::out);
+ pacingRateStream << "#Time(s) Pacing Rate (Mb/s)" << std::endl;
+
+ ssThreshStream.open ("tcp-dynamic-pacing-ssthresh.dat", std::ios::out);
+ ssThreshStream << "#Time(s) Slow Start threshold (B)" << std::endl;
+
+ packetTraceStream.open ("tcp-dynamic-pacing-packet-trace.dat", std::ios::out);
+ packetTraceStream << "#Time(s) tx/rx size (B)" << std::endl;
+
+ Simulator::Schedule (MicroSeconds (1001), &ConnectSocketTraces);
+
FlowMonitorHelper flowmon;
Ptr monitor = flowmon.InstallAll ();
NS_LOG_INFO ("Run Simulation.");
- Simulator::Stop (Seconds (5));
+ Simulator::Stop (simulationEndTime);
Simulator::Run ();
monitor->CheckForLostPackets ();
@@ -141,19 +311,19 @@ main (int argc, char *argv[])
for (std::map::const_iterator i = stats.begin (); i != stats.end (); ++i)
{
Ipv4FlowClassifier::FiveTuple t = classifier->FindFlow (i->first);
- if (t.sourceAddress == "10.1.1.2")
- {
- continue;
- }
+
std::cout << "Flow " << i->first << " (" << t.sourceAddress << " -> " << t.destinationAddress << ")\n";
std::cout << " Tx Packets: " << i->second.txPackets << "\n";
std::cout << " Tx Bytes: " << i->second.txBytes << "\n";
- std::cout << " TxOffered: " << i->second.txBytes * 8.0 / 9.0 / 1000 / 1000 << " Mbps\n";
+ std::cout << " TxOffered: " << i->second.txBytes * 8.0 / simulationEndTime.GetSeconds () / 1000 / 1000 << " Mbps\n";
std::cout << " Rx Packets: " << i->second.rxPackets << "\n";
std::cout << " Rx Bytes: " << i->second.rxBytes << "\n";
- std::cout << " Throughput: " << i->second.rxBytes * 8.0 / 9.0 / 1000 / 1000 << " Mbps\n";
+ std::cout << " Throughput: " << i->second.rxBytes * 8.0 / simulationEndTime.GetSeconds () / 1000 / 1000 << " Mbps\n";
}
+
+ cwndStream.close ();
+ pacingRateStream.close ();
+ ssThreshStream.close ();
Simulator::Destroy ();
- NS_LOG_INFO ("Done.");
}
diff --git a/src/internet/doc/tcp.rst b/src/internet/doc/tcp.rst
index 09ed0188f..b217760ff 100644
--- a/src/internet/doc/tcp.rst
+++ b/src/internet/doc/tcp.rst
@@ -968,10 +968,11 @@ environment. Some differences were noted:
* Linux maintains its congestion window in segments and not bytes, and
the arithmetic is not floating point, so some differences in the
evolution of congestion window have been observed.
-* Linux uses pacing, while ns-3 currently does not provide a dynamically
- adjusting pacing implementation; segments are sent out at the line rate
- unless the user has enabled pacing and set the maximum pacing rate to
- less than the line rate.
+* Linux uses pacing, where packets to be sent are paced out at regular
+ intervals. However, if at any instant the number of segments that can
+ be sent are less than two, Linux does not pace them and instead sends
+ them back-to-back. Currently, ns-3 paces out all packets eligible to
+ be sent in the same manner.
* Linux implements a state called 'Congestion Window Reduced' (CWR)
immediately following a cwnd reduction, and performs proportional rate
reduction similar to how a fast retransmit event is handled. During
@@ -1150,6 +1151,81 @@ The following issues are yet to be addressed:
outgoing TCP sessions (e.g. a TCP may perform ECN echoing but not set the
ECT codepoints on its outbound data segments).
+Support for Dynamic Pacing
+++++++++++++++++++++++++++
+
+TCP pacing refers to the sender-side practice of scheduling the transmission
+of a burst of eligible TCP segments across a time interval such as
+a TCP RTT, to avoid or reduce bursts. Historically,
+TCP used the natural ACK clocking mechanism to pace segments, but some
+network paths introduce aggregation (bursts of ACKs arriving) or ACK
+thinning, either of which disrupts ACK clocking.
+Some latency-sensitive congestion controls under development (Prague, BBR)
+require pacing to operate effectively.
+
+Until recently, the state of the art in Linux was to support pacing in one
+of two ways:
+
+1) fq/pacing with sch_fq
+2) TCP internal pacing
+
+The presentation by Dumazet and Cheng at IETF 88 summarizes:
+https://www.ietf.org/proceedings/88/slides/slides-88-tcpm-9.pdf
+
+The first option was most often used when offloading (TSO) was enabled and
+when the sch_fq scheduler was used at the traffic control (qdisc) sublayer. In
+this case, TCP was responsible for setting the socket pacing rate, but
+the qdisc sublayer would enforce it. When TSO was enabled, the kernel
+would break a large burst into smaller chunks, with dynamic sizing based
+on the pacing rate, and hand off the segments to the fq qdisc for
+pacing.
+
+The second option was used if sch_fq was not enabled; TCP would be
+responsible for internally pacing.
+
+In 2018, Linux switched to an Early Departure Model (EDM): https://lwn.net/Articles/766564/.
+
+TCP pacing in Linux was added in kernel 3.12, and authors chose to allow
+a pacing rate of 200% against the current rate, to allow probing for
+optimal throughput even during slow start phase. Some refinements were
+added in https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=43e122b014c9,
+in which Google reported that it was better to apply
+a different ratio (120%) in Congestion Avoidance phase. Furthermore,
+authors found that after cwnd reduction, it was helpful to become more
+conservative and switch to the conservative ratio (120%) as soon as
+cwnd >= ssthresh/2, as the initial ramp up (when ssthresh is infinite) still
+allows doubling cwnd every other RTT. Linux also does not pace the initial
+window (IW), typically 10 segments in practice.
+
+Linux has also been observed to not pace if the number of eligible segments
+to be sent is exactly two; they will be sent back to back. If three or
+more, the first two are sent immediately, and additional segments are paced
+at the current pacing rate.
+
+In ns-3, the model is as follows. There is no TSO/sch_fq model; only
+internal pacing according to current Linux policy.
+
+Pacing may be enabled for any TCP congestion control, and a maximum
+pacing rate can be set. Furthermore, dynamic pacing is enabled for
+all TCP variants, according to the following guidelines.
+
+* Pacing of the initial window (IW) is not done by default but can be
+ separately enabled.
+
+* Pacing of the initial slow start, after IW, is done according to the
+ pacing rate of 200% of the current rate, to allow for window growth
+ This pacing rate can be configured to a different value than 200%.
+
+* Pacing of congestion avoidance phase is done at a pacing rate of 120% of
+ current rate. This can be configured to a different value than 120%.
+
+* Pacing of subsequent slow start is done according to the following
+ heuristic. If cwnd < ssthresh/2, such as after a timeout or idle period,
+ pace at the slow start rate (200%). Otherwise, pace at the congestion
+ avoidance rate.
+
+Dynamic pacing is demonstrated by the example program ``examples/tcp/tcp-pacing.cc``.
+
Validation
++++++++++
@@ -1188,6 +1264,7 @@ section below on :ref:`Writing-tcp-tests`.
* **tcp-zero-window-test:** Unit test persist behavior for zero window conditions
* **tcp-close-test:** Unit test on the socket closing: both receiver and sender have to close their socket when all bytes are transferred
* **tcp-ecn-test:** Unit tests on Explicit Congestion Notification
+* **tcp-pacing-test:** Unit tests on dynamic TCP pacing rate
Several tests have dependencies outside of the ``internet`` module, so they
are located in a system test directory called ``src/test/ns3tcp``. Three
diff --git a/src/internet/model/tcp-socket-base.cc b/src/internet/model/tcp-socket-base.cc
index 7900253f9..ceae971e2 100644
--- a/src/internet/model/tcp-socket-base.cc
+++ b/src/internet/model/tcp-socket-base.cc
@@ -196,6 +196,10 @@ TcpSocketBase::GetTypeId (void)
"Highest ack received from peer",
MakeTraceSourceAccessor (&TcpSocketBase::m_highRxAckMark),
"ns3::TracedValueCallback::SequenceNumber32")
+ .AddTraceSource ("PacingRate",
+ "The current TCP pacing rate",
+ MakeTraceSourceAccessor (&TcpSocketBase::m_pacingRateTrace),
+ "ns3::TracedValueCallback::DataRate")
.AddTraceSource ("CongestionWindow",
"The TCP connection's congestion window",
MakeTraceSourceAccessor (&TcpSocketBase::m_cWndTrace),
@@ -249,13 +253,17 @@ TcpSocketBase::TcpSocketBase (void)
m_tcb->m_rxBuffer = CreateObject ();
- m_tcb->m_currentPacingRate = m_tcb->m_maxPacingRate;
+ m_tcb->m_pacingRate = m_tcb->m_maxPacingRate;
m_pacingTimer.SetFunction (&TcpSocketBase::NotifyPacingPerformed, this);
m_tcb->m_sendEmptyPacketCallback = MakeCallback (&TcpSocketBase::SendEmptyPacket, this);
bool ok;
+ ok = m_tcb->TraceConnectWithoutContext ("PacingRate",
+ MakeCallback (&TcpSocketBase::UpdatePacingRateTrace, this));
+ NS_ASSERT (ok == true);
+
ok = m_tcb->TraceConnectWithoutContext ("CongestionWindow",
MakeCallback (&TcpSocketBase::UpdateCwnd, this));
NS_ASSERT (ok == true);
@@ -365,7 +373,7 @@ TcpSocketBase::TcpSocketBase (const TcpSocketBase& sock)
m_tcb = CopyObject (sock.m_tcb);
m_tcb->m_rxBuffer = CopyObject (sock.m_tcb->m_rxBuffer);
- m_tcb->m_currentPacingRate = m_tcb->m_maxPacingRate;
+ m_tcb->m_pacingRate = m_tcb->m_maxPacingRate;
m_pacingTimer.SetFunction (&TcpSocketBase::NotifyPacingPerformed, this);
if (sock.m_congestionControl)
@@ -387,6 +395,9 @@ TcpSocketBase::TcpSocketBase (const TcpSocketBase& sock)
bool ok;
+ ok = m_tcb->TraceConnectWithoutContext ("PacingRate",
+ MakeCallback (&TcpSocketBase::UpdatePacingRateTrace, this));
+
ok = m_tcb->TraceConnectWithoutContext ("CongestionWindow",
MakeCallback (&TcpSocketBase::UpdateCwnd, this));
NS_ASSERT (ok == true);
@@ -2092,6 +2103,11 @@ TcpSocketBase::ProcessAck(const SequenceNumber32 &ackNumber, bool scoreboardUpda
}
}
}
+ // Update the pacing rate, since m_congestionControl->IncreaseWindow() or
+ // m_congestionControl->PktsAcked () may change m_tcb->m_cWnd
+ // Make sure that control reaches the end of this function and there is no
+ // return in between
+ UpdatePacingRate ();
}
/* Received a packet upon LISTEN state. */
@@ -2183,6 +2199,8 @@ TcpSocketBase::ProcessSynSent (Ptr packet, const TcpHeader& tcpHeader)
m_tcb->m_rxBuffer->SetNextRxSequence (tcpHeader.GetSequenceNumber () + SequenceNumber32 (1));
m_tcb->m_highTxMark = ++m_tcb->m_nextTxSequence;
m_txBuffer->SetHeadSequence (m_tcb->m_nextTxSequence);
+ // Before sending packets, update the pacing rate based on RTT measurement so far
+ UpdatePacingRate ();
SendEmptyPacket (TcpHeader::ACK);
/* Check if we received an ECN SYN-ACK packet. Change the ECN state of sender to ECN_IDLE if receiver has sent an ECN SYN-ACK
@@ -2256,6 +2274,8 @@ TcpSocketBase::ProcessSynRcvd (Ptr packet, const TcpHeader& tcpHeader,
m_delAckCount = m_delAckMaxCount;
NotifyNewConnectionCreated (this, fromAddress);
ReceivedAck (packet, tcpHeader);
+ // Update the pacing rate based on RTT measurement so far
+ UpdatePacingRate ();
// As this connection is established, the socket is available to send data now
if (GetTxAvailable () > 0)
{
@@ -2967,20 +2987,24 @@ TcpSocketBase::SendDataPacket (SequenceNumber32 seq, uint32_t maxSize, bool with
// peer when it is not retransmission.
NS_ASSERT (isRetransmission || ((m_highRxAckMark + SequenceNumber32 (m_rWnd)) >= (seq + SequenceNumber32 (maxSize))));
- if (m_tcb->m_pacing)
+ if (IsPacingEnabled ())
{
NS_LOG_INFO ("Pacing is enabled");
if (m_pacingTimer.IsExpired ())
{
- NS_LOG_DEBUG ("Current Pacing Rate " << m_tcb->m_currentPacingRate);
- NS_LOG_DEBUG ("Timer is in expired state, activate it " << m_tcb->m_currentPacingRate.CalculateBytesTxTime (sz));
- m_pacingTimer.Schedule (m_tcb->m_currentPacingRate.CalculateBytesTxTime (sz));
+ NS_LOG_DEBUG ("Current Pacing Rate " << m_tcb->m_pacingRate);
+ NS_LOG_DEBUG ("Timer is in expired state, activate it " << m_tcb->m_pacingRate.Get ().CalculateBytesTxTime (sz));
+ m_pacingTimer.Schedule (m_tcb->m_pacingRate.Get ().CalculateBytesTxTime (sz));
}
else
{
NS_LOG_INFO ("Timer is already in running state");
}
}
+ else
+ {
+ NS_LOG_INFO ("Pacing is disabled");
+ }
if (withAck)
{
@@ -2995,6 +3019,7 @@ TcpSocketBase::SendDataPacket (SequenceNumber32 seq, uint32_t maxSize, bool with
m_congestionControl->ReduceCwnd (m_tcb);
m_tcb->m_ssThresh = m_tcb->m_cWnd;
m_tcb->m_cWndInfl = m_tcb->m_cWnd;
+ UpdatePacingRate ();
flags |= TcpHeader::CWR;
m_ecnCWRSeq = seq;
NS_LOG_DEBUG (TcpSocketState::EcnStateName[m_tcb->m_ecnState] << " -> ECN_CWR_SENT");
@@ -3133,7 +3158,7 @@ TcpSocketBase::SendPendingData (bool withAck)
// else branch to control silly window syndrome and Nagle)
while (availableWindow > 0)
{
- if (m_tcb->m_pacing)
+ if (IsPacingEnabled ())
{
NS_LOG_INFO ("Pacing is enabled");
if (m_pacingTimer.IsRunning ())
@@ -3228,14 +3253,14 @@ TcpSocketBase::SendPendingData (bool withAck)
" size " << sz);
m_tcb->m_nextTxSequence += sz;
++nPacketsSent;
- if (m_tcb->m_pacing)
+ if (IsPacingEnabled ())
{
NS_LOG_INFO ("Pacing is enabled");
if (m_pacingTimer.IsExpired ())
{
- NS_LOG_DEBUG ("Current Pacing Rate " << m_tcb->m_currentPacingRate);
- NS_LOG_DEBUG ("Timer is in expired state, activate it " << m_tcb->m_currentPacingRate.CalculateBytesTxTime (sz));
- m_pacingTimer.Schedule (m_tcb->m_currentPacingRate.CalculateBytesTxTime (sz));
+ NS_LOG_DEBUG ("Current Pacing Rate " << m_tcb->m_pacingRate);
+ NS_LOG_DEBUG ("Timer is in expired state, activate it " << m_tcb->m_pacingRate.Get ().CalculateBytesTxTime (sz));
+ m_pacingTimer.Schedule (m_tcb->m_pacingRate.Get ().CalculateBytesTxTime (sz));
break;
}
}
@@ -4265,6 +4290,12 @@ TcpSocketBase::SetRetxThresh (uint32_t retxThresh)
m_txBuffer->SetDupAckThresh (retxThresh);
}
+void
+TcpSocketBase::UpdatePacingRateTrace (DataRate oldValue, DataRate newValue)
+{
+ m_pacingRateTrace (oldValue, newValue);
+}
+
void
TcpSocketBase::UpdateCwnd (uint32_t oldValue, uint32_t newValue)
{
@@ -4363,6 +4394,89 @@ TcpSocketBase::NotifyPacingPerformed (void)
SendPendingData (m_connected);
}
+bool
+TcpSocketBase::IsPacingEnabled (void) const
+{
+ if (!m_tcb->m_pacing)
+ {
+ return false;
+ }
+ else
+ {
+ if (m_tcb->m_paceInitialWindow)
+ {
+ return true;
+ }
+ SequenceNumber32 highTxMark = m_tcb->m_highTxMark; // cast traced value
+ if (highTxMark.GetValue () > (GetInitialCwnd () * m_tcb->m_segmentSize))
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+void
+TcpSocketBase::UpdatePacingRate (void)
+{
+ NS_LOG_FUNCTION (this << m_tcb);
+
+ // According to Linux, set base pacing rate to (cwnd * mss) / srtt
+ //
+ // In (early) slow start, multiply base by the slow start factor.
+ // In late slow start and congestion avoidance, multiply base by
+ // the congestion avoidance factor.
+ // Comment from Linux code regarding early/late slow start:
+ // Normal Slow Start condition is (tp->snd_cwnd < tp->snd_ssthresh)
+ // If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching
+ // end of slow start and should slow down.
+
+ // Similar to Linux, do not update pacing rate here if the
+ // congestion control implements TcpCongestionOps::CongControl ()
+ if (m_congestionControl->HasCongControl () || !m_tcb->m_pacing) return;
+
+ double factor;
+ if (m_tcb->m_cWnd < m_tcb->m_ssThresh/2)
+ {
+ NS_LOG_DEBUG ("Pacing according to slow start factor; " << m_tcb->m_cWnd << " " << m_tcb->m_ssThresh);
+ factor = static_cast (m_tcb->m_pacingSsRatio)/100;
+ }
+ else
+ {
+ NS_LOG_DEBUG ("Pacing according to congestion avoidance factor; " << m_tcb->m_cWnd << " " << m_tcb->m_ssThresh);
+ factor = static_cast (m_tcb->m_pacingCaRatio)/100;
+ }
+ Time lastRtt = m_tcb->m_lastRtt.Get (); // Get underlying Time value
+ NS_LOG_DEBUG ("Last RTT is " << lastRtt.GetSeconds ());
+
+ // Multiply by 8 to convert from bytes per second to bits per second
+ DataRate pacingRate ((std::max (m_tcb->m_cWnd, m_tcb->m_bytesInFlight) * 8 * factor) / lastRtt.GetSeconds ());
+ if (pacingRate < m_tcb->m_maxPacingRate)
+ {
+ NS_LOG_DEBUG ("Pacing rate updated to: " << pacingRate);
+ m_tcb->m_pacingRate = pacingRate;
+ }
+ else
+ {
+ NS_LOG_DEBUG ("Pacing capped by max pacing rate: " << m_tcb->m_maxPacingRate);
+ m_tcb->m_pacingRate = m_tcb->m_maxPacingRate;
+ }
+}
+
+void
+TcpSocketBase::SetPacingStatus (bool pacing)
+{
+ NS_LOG_FUNCTION (this << pacing);
+ m_tcb->m_pacing = pacing;
+}
+
+void
+TcpSocketBase::SetPaceInitialWindow (bool paceWindow)
+{
+ NS_LOG_FUNCTION (this << paceWindow);
+ m_tcb->m_paceInitialWindow = paceWindow;
+}
+
void
TcpSocketBase::SetUseEcn (TcpSocketState::UseEcn_t useEcn)
{
diff --git a/src/internet/model/tcp-socket-base.h b/src/internet/model/tcp-socket-base.h
index ef7a53674..6a4edc78b 100644
--- a/src/internet/model/tcp-socket-base.h
+++ b/src/internet/model/tcp-socket-base.h
@@ -317,6 +317,11 @@ public:
*/
uint32_t GetRetxThresh (void) const { return m_retxThresh; }
+ /**
+ * \brief Callback pointer for pacing rate trace chaining
+ */
+ TracedCallback m_pacingRateTrace;
+
/**
* \brief Callback pointer for cWnd trace chaining
*/
@@ -362,6 +367,13 @@ public:
*/
TracedCallback