bug 826: optimize hp implementation for 32bit systems

This commit is contained in:
Mathieu Lacage
2010-07-05 10:15:42 +02:00
parent 895f08bda3
commit 698c8b1e2a
2 changed files with 71 additions and 274 deletions

View File

@@ -25,137 +25,39 @@
namespace ns3 {
#ifdef GATHER_STATISTICS
int HighPrecision::m_nfastadds = 0;
int HighPrecision::m_nfastsubs = 0;
int HighPrecision::m_nfastmuls = 0;
int HighPrecision::m_nfastcmps = 0;
int HighPrecision::m_nfastgets = 0;
int HighPrecision::m_nslowadds = 0;
int HighPrecision::m_nslowsubs = 0;
int HighPrecision::m_nslowmuls = 0;
int HighPrecision::m_nslowcmps = 0;
int HighPrecision::m_nslowgets = 0;
int HighPrecision::m_ndivs = 0;
int HighPrecision::m_nconversions = 0;
void
HighPrecision::PrintStats (void)
{
double nadds = m_nfastadds + m_nslowadds;
double nsubs = m_nfastsubs + m_nslowsubs;
double ncmps = m_nfastcmps + m_nslowcmps;
double nmuls = m_nfastmuls + m_nslowmuls;
double ngets = m_nfastgets + m_nslowgets;
double fast_add_ratio = m_nfastadds / nadds;
double fast_sub_ratio = m_nfastsubs / nsubs;
double fast_cmp_ratio = m_nfastcmps / ncmps;
double fast_mul_ratio = m_nfastmuls / nmuls;
double fast_get_ratio = m_nfastgets / ngets;
std::cout << "add=" << fast_add_ratio << std::endl
<< "sub=" << fast_sub_ratio << std::endl
<< "cmp=" << fast_cmp_ratio << std::endl
<< "mul=" << fast_mul_ratio << std::endl
<< "get=" << fast_get_ratio << std::endl
<< "nadds=" << nadds << std::endl
<< "nsubs=" << nsubs << std::endl
<< "ncmps=" << ncmps << std::endl
<< "nmuls=" << nmuls << std::endl
<< "ngets=" << ngets << std::endl
<< "ndivs=" << m_ndivs << std::endl
<< "nconversions=" << m_nconversions << std::endl
;
}
#else
void
HighPrecision::PrintStats (void)
{
}
#endif /* GATHER_STATISTICS */
const double HighPrecision::MAX_64 = 18446744073709551615.0;
HighPrecision::HighPrecision (double value)
double HighPrecision::GetDouble (void) const
{
int64_t hi = (int64_t) floor (value);
uint64_t lo = (uint64_t) ((value - floor (value)) * MAX_64);
if (lo == 0)
{
m_isFast = true;
m_fastValue = hi;
return;
}
else
{
m_isFast = false;
m_slowValue = _cairo_int64_to_int128 (hi);
m_slowValue = _cairo_int128_lsl (m_slowValue, 64);
cairo_int128_t clo = _cairo_uint128_to_int128 (_cairo_uint64_to_uint128 (lo));
m_slowValue = _cairo_int128_add (m_slowValue, clo);
}
}
void
HighPrecision::EnsureSlow (void)
{
if (m_isFast)
{
HP128INC (m_nconversions++);
m_slowValue = _cairo_int64_to_int128 (m_fastValue);
m_slowValue = _cairo_int128_lsl (m_slowValue, 64);
m_isFast = false;
}
}
int64_t
HighPrecision::SlowGetInteger (void) const
{
cairo_int128_t value = _cairo_int128_rsa (m_slowValue, 64);
return _cairo_int128_to_int64 (value);
}
double
HighPrecision::SlowGetDouble (void) const
{
bool is_negative = _cairo_int128_negative (m_slowValue);
cairo_int128_t value = is_negative ? _cairo_int128_negate (m_slowValue) : m_slowValue;
bool is_negative = _cairo_int128_negative (m_value);
cairo_int128_t value = is_negative ? _cairo_int128_negate (m_value) : m_value;
cairo_int128_t hi = _cairo_int128_rsa (value, 64);
cairo_uint128_t lo = _cairo_int128_sub (value, _cairo_uint128_lsl (hi, 64));
double flo = _cairo_uint128_to_uint64 (lo);
flo /= MAX_64;
double retval = _cairo_uint128_to_uint64 (hi);
retval += flo;
retval *= is_negative ? -1.0 : 1.0;
retval = is_negative ? -retval: retval;
return retval;
}
bool
HighPrecision::SlowAdd (HighPrecision const &o)
void
HighPrecision::Mul (HighPrecision const &o)
{
EnsureSlow ();
const_cast<HighPrecision &> (o).EnsureSlow ();
m_slowValue = _cairo_int128_add (m_slowValue, o.m_slowValue);
return false;
}
bool
HighPrecision::SlowSub (HighPrecision const &o)
{
EnsureSlow ();
const_cast<HighPrecision &> (o).EnsureSlow ();
m_slowValue = _cairo_int128_sub (m_slowValue, o.m_slowValue);
return false;
}
bool
HighPrecision::SlowMul (HighPrecision const &o)
{
EnsureSlow ();
const_cast<HighPrecision &> (o).EnsureSlow ();
// use the 128 bits multiplication
m_slowValue = Mul128 (m_slowValue,o.m_slowValue);
return false;
m_value = Mul128 (m_value,o.m_value);
}
HighPrecision::HighPrecision (double value)
{
int64_t hi = (int64_t) floor (value);
uint64_t lo = (uint64_t) ((value - floor (value)) * MAX_64);
m_value = _cairo_int64_to_int128 (hi);
m_value = _cairo_int128_lsl (m_value, 64);
cairo_int128_t clo = _cairo_uint128_to_int128 (_cairo_uint64_to_uint128 (lo));
m_value = _cairo_int128_add (m_value, clo);
}
/**
* this function multiplies two 128 bits fractions considering
* the high 64 bits as the integer part and the low 64 bits
@@ -163,7 +65,7 @@ HighPrecision::SlowMul (HighPrecision const &o)
* of the operands to produce a signed 128 bits result.
*/
cairo_int128_t
HighPrecision::Mul128 (cairo_int128_t sa, cairo_int128_t sb )
HighPrecision::Mul128 (cairo_int128_t sa, cairo_int128_t sb ) const
{
bool negResult, negA, negB;
@@ -205,20 +107,15 @@ HighPrecision::Mul128 (cairo_int128_t sa, cairo_int128_t sb )
return _cairo_uint128_to_int128 (result);
}
bool
void
HighPrecision::Div (HighPrecision const &o)
{
HP128INC (m_ndivs++);
EnsureSlow ();
const_cast<HighPrecision &> (o).EnsureSlow ();
cairo_int128_t result = Div128 (m_slowValue, o.m_slowValue);
m_slowValue = result;
return false;
cairo_int128_t result = Div128 (m_value, o.m_value);
m_value = result;
}
cairo_int128_t
HighPrecision::Div128 (cairo_int128_t sa, cairo_int128_t sb)
HighPrecision::Div128 (cairo_int128_t sa, cairo_int128_t sb) const
{
bool negResult, negA, negB;
// take the sign of the operands
@@ -254,24 +151,6 @@ HighPrecision::Div128 (cairo_int128_t sa, cairo_int128_t sb)
result = negResult ? _cairo_uint128_negate (result) : result;
return _cairo_uint128_to_int128 (result);
}
int
HighPrecision::SlowCompare (HighPrecision const &o) const
{
const_cast<HighPrecision *> (this)->EnsureSlow ();
const_cast<HighPrecision &> (o).EnsureSlow ();
if (_cairo_int128_lt (m_slowValue, o.m_slowValue))
{
return -1;
}
else if (_cairo_int128_eq (m_slowValue, o.m_slowValue))
{
return 0;
}
else
{
return 1;
}
}
} // namespace ns3

View File

@@ -69,15 +69,6 @@
*
*/
#define noGATHER_STATISTICS 1
#ifdef GATHER_STATISTICS
#define HP128INC(x) x++
#else
#define HP128INC(x)
#endif
namespace ns3 {
class HighPrecision
@@ -87,167 +78,95 @@ public:
inline HighPrecision (int64_t value, bool dummy);
HighPrecision (double value);
static void PrintStats (void);
inline int64_t GetInteger (void) const;
inline double GetDouble (void) const;
inline bool Add (HighPrecision const &o);
inline bool Sub (HighPrecision const &o);
inline bool Mul (HighPrecision const &o);
bool Div (HighPrecision const &o);
double GetDouble (void) const;
inline void Add (HighPrecision const &o);
inline void Sub (HighPrecision const &o);
void Mul (HighPrecision const &o);
void Div (HighPrecision const &o);
inline int Compare (HighPrecision const &o) const;
inline static HighPrecision Zero (void);
private:
int64_t SlowGetInteger (void) const;
double SlowGetDouble (void) const;
bool SlowAdd (HighPrecision const &o);
bool SlowSub (HighPrecision const &o);
bool SlowMul (HighPrecision const &o);
int SlowCompare (HighPrecision const &o) const;
cairo_uint128_t Mul128 (cairo_uint128_t, cairo_uint128_t );
cairo_int128_t Div128 (cairo_int128_t sa, cairo_int128_t sb);
inline void EnsureSlow (void);
cairo_uint128_t Mul128 (cairo_uint128_t, cairo_uint128_t ) const;
cairo_int128_t Div128 (cairo_int128_t sa, cairo_int128_t sb) const;
inline bool IsNegative (void) const;
static const double MAX_64;
bool m_isFast;
int64_t m_fastValue;
cairo_int128_t m_slowValue;
#ifdef GATHER_STATISTICS
static int m_nfastadds;
static int m_nfastsubs;
static int m_nfastmuls;
static int m_nfastcmps;
static int m_nfastgets;
static int m_nslowadds;
static int m_nslowsubs;
static int m_nslowmuls;
static int m_nslowcmps;
static int m_nslowgets;
static int m_ndivs;
static int m_nconversions;
#endif /* GATHER_STATISTICS */
cairo_int128_t m_value;
};
}; // namespace ns3
} // namespace ns3
namespace ns3 {
HighPrecision::HighPrecision ()
: m_isFast (true),
m_fastValue (0)
{
m_value.hi = 0;
m_value.lo = 0;
}
HighPrecision::HighPrecision (int64_t value, bool dummy)
: m_isFast (true),
m_fastValue (value)
{
m_value.hi = value;
m_value.lo = 0;
}
bool
HighPrecision::IsNegative (void) const
{
int64_t hi = m_value.hi;
return hi < 0;
}
int64_t
HighPrecision::GetInteger (void) const
{
if (m_isFast)
{
HP128INC (m_nfastgets);
return m_fastValue;
}
else
{
HP128INC (m_nslowgets);
return SlowGetInteger ();
}
return m_value.hi;
}
double HighPrecision::GetDouble (void) const
{
if (m_isFast)
{
HP128INC (m_nfastgets);
double retval = m_fastValue;
return retval;
}
else
{
HP128INC (m_nslowgets);
return SlowGetDouble ();
}
}
bool
void
HighPrecision::Add (HighPrecision const &o)
{
if (m_isFast && o.m_isFast)
m_value.hi += o.m_value.hi;
m_value.lo += o.m_value.lo;
if (m_value.lo < o.m_value.lo)
{
HP128INC (m_nfastadds);
m_fastValue += o.m_fastValue;
return false;
}
else
{
HP128INC (m_nslowadds);
return SlowAdd (o);
m_value.hi++;
}
}
bool
void
HighPrecision::Sub (HighPrecision const &o)
{
if (m_isFast && o.m_isFast)
m_value.hi -= o.m_value.hi;
m_value.lo -= o.m_value.lo;
if (m_value.lo > o.m_value.lo)
{
HP128INC (m_nfastsubs);
m_fastValue -= o.m_fastValue;
return false;
}
else
{
HP128INC (m_nslowsubs);
return SlowSub (o);
m_value.hi--;
}
}
bool
HighPrecision::Mul (HighPrecision const &o)
{
if (m_isFast && o.m_isFast)
{
HP128INC (m_nfastmuls);
m_fastValue *= o.m_fastValue;
return false;
}
else
{
HP128INC (m_nslowmuls);
return SlowMul (o);
}
}
int
HighPrecision::Compare (HighPrecision const &o) const
{
if (m_isFast && o.m_isFast)
if (IsNegative () && !o.IsNegative ())
{
HP128INC (m_nfastcmps);
if (m_fastValue < o.m_fastValue)
{
return -1;
}
else if (m_fastValue == o.m_fastValue)
{
return 0;
}
else
{
return +1;
}
return -1;
}
else if (!IsNegative () && o.IsNegative ())
{
return 1;
}
else if (m_value.hi < o.m_value.hi)
{
return -1;
}
else if (m_value.hi > o.m_value.hi)
{
return 1;
}
else
{
HP128INC (m_nslowcmps);
return SlowCompare (o);
return m_value.lo < o.m_value.lo;
}
// The below statement is unreachable but necessary for optimized
// builds with gcc-4.0.x due to a compiler bug.
return 0;
}
HighPrecision
HighPrecision::Zero (void)
@@ -255,7 +174,6 @@ HighPrecision::Zero (void)
return HighPrecision ();
}
}; // namespace ns3
} // namespace ns3
#endif /* HIGH_PRECISION_CAIRO_H */