From 698c8b1e2ada9ee68ed59f916b80db0ef1eb8bc3 Mon Sep 17 00:00:00 2001
From: Mathieu Lacage <mathieu.lacage@sophia.inria.fr>
Date: Mon, 5 Jul 2010 10:15:42 +0200
Subject: [PATCH] bug 826: optimize hp implementation for 32bit systems

---
 src/simulator/high-precision-cairo.cc | 169 ++++---------------------
 src/simulator/high-precision-cairo.h  | 176 +++++++-------------------
 2 files changed, 71 insertions(+), 274 deletions(-)

diff --git a/src/simulator/high-precision-cairo.cc b/src/simulator/high-precision-cairo.cc
index d1939901e..45f882b07 100644
--- a/src/simulator/high-precision-cairo.cc
+++ b/src/simulator/high-precision-cairo.cc
@@ -25,137 +25,39 @@
 
 namespace ns3 {
 
-#ifdef GATHER_STATISTICS
-int HighPrecision::m_nfastadds = 0;
-int HighPrecision::m_nfastsubs = 0;
-int HighPrecision::m_nfastmuls = 0;
-int HighPrecision::m_nfastcmps = 0;
-int HighPrecision::m_nfastgets = 0;
-int HighPrecision::m_nslowadds = 0;
-int HighPrecision::m_nslowsubs = 0;
-int HighPrecision::m_nslowmuls = 0;
-int HighPrecision::m_nslowcmps = 0;
-int HighPrecision::m_nslowgets = 0;
-int HighPrecision::m_ndivs = 0;
-int HighPrecision::m_nconversions = 0;
-
-void
-HighPrecision::PrintStats (void)
-{
-  double nadds = m_nfastadds + m_nslowadds;
-  double nsubs = m_nfastsubs + m_nslowsubs;
-  double ncmps = m_nfastcmps + m_nslowcmps;
-  double nmuls = m_nfastmuls + m_nslowmuls;
-  double ngets = m_nfastgets + m_nslowgets;
-  double fast_add_ratio = m_nfastadds / nadds;
-  double fast_sub_ratio = m_nfastsubs / nsubs;
-  double fast_cmp_ratio = m_nfastcmps / ncmps;
-  double fast_mul_ratio = m_nfastmuls / nmuls;
-  double fast_get_ratio = m_nfastgets / ngets;
-
-  std::cout << "add=" << fast_add_ratio << std::endl
-            << "sub=" << fast_sub_ratio << std::endl
-            << "cmp=" << fast_cmp_ratio << std::endl
-            << "mul=" << fast_mul_ratio << std::endl
-            << "get=" << fast_get_ratio << std::endl
-            << "nadds=" << nadds << std::endl
-            << "nsubs=" << nsubs << std::endl
-            << "ncmps=" << ncmps << std::endl
-            << "nmuls=" << nmuls << std::endl
-            << "ngets=" << ngets << std::endl
-            << "ndivs=" << m_ndivs << std::endl
-            << "nconversions=" << m_nconversions << std::endl
-  ;
-}
-#else
-void
-HighPrecision::PrintStats (void)
-{
-}
-#endif /* GATHER_STATISTICS */
-
-
 const double HighPrecision::MAX_64 = 18446744073709551615.0;
 
-
-HighPrecision::HighPrecision (double value)
+double HighPrecision::GetDouble (void) const
 {
-  int64_t hi = (int64_t) floor (value);
-  uint64_t lo = (uint64_t) ((value - floor (value)) * MAX_64);
-  if (lo == 0)
-    {
-      m_isFast = true;
-      m_fastValue = hi;
-      return;
-    }
-  else
-    {
-      m_isFast = false;
-      m_slowValue = _cairo_int64_to_int128 (hi);
-      m_slowValue = _cairo_int128_lsl (m_slowValue, 64);
-      cairo_int128_t clo = _cairo_uint128_to_int128 (_cairo_uint64_to_uint128 (lo));
-      m_slowValue = _cairo_int128_add (m_slowValue, clo);
-    }
-}
-
-void
-HighPrecision::EnsureSlow (void)
-{
-  if (m_isFast)
-    {
-      HP128INC (m_nconversions++);
-      m_slowValue = _cairo_int64_to_int128 (m_fastValue);
-      m_slowValue = _cairo_int128_lsl (m_slowValue, 64);
-      m_isFast = false;
-    }
-}
-
-int64_t
-HighPrecision::SlowGetInteger (void) const
-{
-  cairo_int128_t value = _cairo_int128_rsa (m_slowValue, 64);
-  return _cairo_int128_to_int64 (value);
-}
-
-double
-HighPrecision::SlowGetDouble (void) const
-{
-  bool is_negative = _cairo_int128_negative (m_slowValue);
-  cairo_int128_t value = is_negative ? _cairo_int128_negate (m_slowValue) : m_slowValue;
+  bool is_negative = _cairo_int128_negative (m_value);
+  cairo_int128_t value = is_negative ? _cairo_int128_negate (m_value) : m_value;
   cairo_int128_t hi = _cairo_int128_rsa (value, 64);
   cairo_uint128_t lo = _cairo_int128_sub (value, _cairo_uint128_lsl (hi, 64));
   double flo = _cairo_uint128_to_uint64 (lo);
   flo /= MAX_64;
   double retval = _cairo_uint128_to_uint64 (hi);
   retval += flo;
-  retval *= is_negative ? -1.0 : 1.0;
+  retval = is_negative ? -retval: retval;
   return retval;
 }
-bool
-HighPrecision::SlowAdd (HighPrecision const &o)
+void
+HighPrecision::Mul (HighPrecision const &o)
 {
-  EnsureSlow ();
-  const_cast<HighPrecision &> (o).EnsureSlow ();
-  m_slowValue = _cairo_int128_add (m_slowValue, o.m_slowValue);
-  return false;
-}
-bool
-HighPrecision::SlowSub (HighPrecision const &o)
-{
-  EnsureSlow ();
-  const_cast<HighPrecision &> (o).EnsureSlow ();
-  m_slowValue = _cairo_int128_sub (m_slowValue, o.m_slowValue);
-  return false;
-}
-bool
-HighPrecision::SlowMul (HighPrecision const &o)
-{
-  EnsureSlow ();
-  const_cast<HighPrecision &> (o).EnsureSlow ();
   // use the 128 bits multiplication
-  m_slowValue = Mul128 (m_slowValue,o.m_slowValue);
-  return false;
+  m_value = Mul128 (m_value,o.m_value);
 }
+
+
+HighPrecision::HighPrecision (double value)
+{
+  int64_t hi = (int64_t) floor (value);
+  uint64_t lo = (uint64_t) ((value - floor (value)) * MAX_64);
+  m_value = _cairo_int64_to_int128 (hi);
+  m_value = _cairo_int128_lsl (m_value, 64);
+  cairo_int128_t clo = _cairo_uint128_to_int128 (_cairo_uint64_to_uint128 (lo));
+  m_value = _cairo_int128_add (m_value, clo);
+}
+
 /**
  * this function multiplies two 128 bits fractions considering
  * the high 64 bits as the integer part and the low 64 bits
@@ -163,7 +65,7 @@ HighPrecision::SlowMul (HighPrecision const &o)
  * of the operands to produce a signed 128 bits result.
  */
 cairo_int128_t
-HighPrecision::Mul128 (cairo_int128_t sa, cairo_int128_t sb )
+HighPrecision::Mul128 (cairo_int128_t sa, cairo_int128_t sb ) const
 {
   bool negResult, negA, negB;
 
@@ -205,20 +107,15 @@ HighPrecision::Mul128 (cairo_int128_t sa, cairo_int128_t sb )
   return _cairo_uint128_to_int128 (result);
 }
 
-bool
+void
 HighPrecision::Div (HighPrecision const &o)
 {
-  HP128INC (m_ndivs++);
-  EnsureSlow ();
-  const_cast<HighPrecision &> (o).EnsureSlow ();
-
-  cairo_int128_t result = Div128 (m_slowValue, o.m_slowValue);
-  m_slowValue = result;
-  return false;
+  cairo_int128_t result = Div128 (m_value, o.m_value);
+  m_value = result;
 }
 
 cairo_int128_t
-HighPrecision::Div128 (cairo_int128_t sa, cairo_int128_t sb)
+HighPrecision::Div128 (cairo_int128_t sa, cairo_int128_t sb) const
 {
   bool negResult, negA, negB;
   // take the sign of the operands
@@ -254,24 +151,6 @@ HighPrecision::Div128 (cairo_int128_t sa, cairo_int128_t sb)
   result = negResult ? _cairo_uint128_negate (result) : result;
   return _cairo_uint128_to_int128 (result);
 }
-int
-HighPrecision::SlowCompare (HighPrecision const &o) const
-{
-  const_cast<HighPrecision *> (this)->EnsureSlow ();
-  const_cast<HighPrecision &> (o).EnsureSlow ();
-  if (_cairo_int128_lt (m_slowValue, o.m_slowValue))
-    {
-      return -1;
-    }
-  else if (_cairo_int128_eq (m_slowValue, o.m_slowValue))
-    {
-      return 0;
-    }
-  else
-    {
-      return 1;
-    }
-}
 
 } // namespace ns3
 
diff --git a/src/simulator/high-precision-cairo.h b/src/simulator/high-precision-cairo.h
index a61f7b878..99a5217ff 100644
--- a/src/simulator/high-precision-cairo.h
+++ b/src/simulator/high-precision-cairo.h
@@ -69,15 +69,6 @@
  *
  */
 
-
-#define noGATHER_STATISTICS 1
-
-#ifdef GATHER_STATISTICS
-#define HP128INC(x) x++
-#else
-#define HP128INC(x)
-#endif
-
 namespace ns3 {
 
 class HighPrecision
@@ -87,167 +78,95 @@ public:
   inline HighPrecision (int64_t value, bool dummy);
   HighPrecision (double value);
 
-  static void PrintStats (void);
-
   inline int64_t GetInteger (void) const;
-  inline double GetDouble (void) const;
-  inline bool Add (HighPrecision const &o);
-  inline bool Sub (HighPrecision const &o);
-  inline bool Mul (HighPrecision const &o);
-  bool Div (HighPrecision const &o);
+  double GetDouble (void) const;
+  inline void Add (HighPrecision const &o);
+  inline void Sub (HighPrecision const &o);
+  void Mul (HighPrecision const &o);
+  void Div (HighPrecision const &o);
 
   inline int Compare (HighPrecision const &o) const;
   inline static HighPrecision Zero (void);
 private:
-  int64_t SlowGetInteger (void) const;
-  double SlowGetDouble (void) const;
-  bool SlowAdd (HighPrecision const &o);
-  bool SlowSub (HighPrecision const &o);
-  bool SlowMul (HighPrecision const &o);
-  int SlowCompare (HighPrecision const &o) const;
-  cairo_uint128_t  Mul128 (cairo_uint128_t, cairo_uint128_t );
-  cairo_int128_t Div128 (cairo_int128_t sa, cairo_int128_t sb);
-  inline void EnsureSlow (void);
+  cairo_uint128_t  Mul128 (cairo_uint128_t, cairo_uint128_t ) const;
+  cairo_int128_t Div128 (cairo_int128_t sa, cairo_int128_t sb) const;
+  inline bool IsNegative (void) const;
 
   static const double MAX_64;
-  bool m_isFast;
-  int64_t m_fastValue;
-  cairo_int128_t m_slowValue;
-
-#ifdef GATHER_STATISTICS
-  static int m_nfastadds;
-  static int m_nfastsubs;
-  static int m_nfastmuls;
-  static int m_nfastcmps;
-  static int m_nfastgets;
-  static int m_nslowadds;
-  static int m_nslowsubs;
-  static int m_nslowmuls;
-  static int m_nslowcmps;
-  static int m_nslowgets;
-  static int m_ndivs;
-  static int m_nconversions;
-#endif /* GATHER_STATISTICS */
+  cairo_int128_t m_value;
 };
 
-}; // namespace ns3
+} // namespace ns3
 
 namespace ns3 {
 
 HighPrecision::HighPrecision ()
-  : m_isFast (true),
-    m_fastValue (0)
 {
+  m_value.hi = 0;
+  m_value.lo = 0;
 }
 
 HighPrecision::HighPrecision (int64_t value, bool dummy)
-  : m_isFast (true),
-    m_fastValue (value)
 {
+  m_value.hi = value;
+  m_value.lo = 0;
 }
 
+bool 
+HighPrecision::IsNegative (void) const
+{
+  int64_t hi = m_value.hi;
+  return hi < 0;
+}
 
 int64_t
 HighPrecision::GetInteger (void) const
 {
-  if (m_isFast)
-    {
-      HP128INC (m_nfastgets);
-      return m_fastValue;
-    }
-  else
-    {
-      HP128INC (m_nslowgets);
-      return SlowGetInteger ();
-    }
+  return m_value.hi;
 }
-double HighPrecision::GetDouble (void) const
-{
-  if (m_isFast)
-    {
-      HP128INC (m_nfastgets);
-      double retval = m_fastValue;
-      return retval;
-    }
-  else
-    {
-      HP128INC (m_nslowgets);
-      return SlowGetDouble ();
-    }
-}
-bool
+void
 HighPrecision::Add (HighPrecision const &o)
 {
-  if (m_isFast && o.m_isFast)
+  m_value.hi += o.m_value.hi;
+  m_value.lo += o.m_value.lo;
+  if (m_value.lo < o.m_value.lo)
     {
-      HP128INC (m_nfastadds);
-      m_fastValue += o.m_fastValue;
-      return false;
-    }
-  else
-    {
-      HP128INC (m_nslowadds);
-      return SlowAdd (o);
+      m_value.hi++;
     }
 }
-bool
+void
 HighPrecision::Sub (HighPrecision const &o)
 {
-  if (m_isFast && o.m_isFast)
+  m_value.hi -= o.m_value.hi;
+  m_value.lo -= o.m_value.lo;
+  if (m_value.lo > o.m_value.lo)
     {
-      HP128INC (m_nfastsubs);
-      m_fastValue -= o.m_fastValue;
-      return false;
-    }
-  else
-    {
-      HP128INC (m_nslowsubs);
-      return SlowSub (o);
+      m_value.hi--;
     }
 }
-bool
-HighPrecision::Mul (HighPrecision const &o)
-{
-  if (m_isFast && o.m_isFast)
-    {
-      HP128INC (m_nfastmuls);
-      m_fastValue *= o.m_fastValue;
-      return false;
-    }
-  else
-    {
-      HP128INC (m_nslowmuls);
-      return SlowMul (o);
-    }
-}
-
 int
 HighPrecision::Compare (HighPrecision const &o) const
 {
-  if (m_isFast && o.m_isFast)
+  if (IsNegative () && !o.IsNegative ())
     {
-      HP128INC (m_nfastcmps);
-      if (m_fastValue < o.m_fastValue)
-        {
-          return -1;
-        }
-      else if (m_fastValue == o.m_fastValue)
-        {
-          return 0;
-        }
-      else
-        {
-          return +1;
-        }
+      return -1;
+    }
+  else if (!IsNegative () && o.IsNegative ())
+    {
+      return 1;
+    }
+  else if (m_value.hi < o.m_value.hi)
+    {
+      return -1;
+    }
+  else if (m_value.hi > o.m_value.hi)
+    {
+      return 1;
     }
   else
     {
-      HP128INC (m_nslowcmps);
-      return SlowCompare (o);
+      return m_value.lo < o.m_value.lo;
     }
-  // The below statement is unreachable but necessary for optimized
-  // builds with gcc-4.0.x due to a compiler bug.
-  return 0;
 }
 HighPrecision
 HighPrecision::Zero (void)
@@ -255,7 +174,6 @@ HighPrecision::Zero (void)
   return HighPrecision ();
 }
 
-
-}; // namespace ns3
+} // namespace ns3
 
 #endif /* HIGH_PRECISION_CAIRO_H */