public class JSoftFloat
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
static byte |
float_exception_flags |
static long |
float128_default_nan_high
The pattern for a default generated quadruple-precision NaN.
|
static long |
float128_default_nan_low |
static float |
float32_default_nan
The pattern for a default generated single-precision NaN.
|
static double |
float64_default_nan
The pattern for a default generated double-precision NaN.
|
static short |
floatx80_default_nan_high
The pattern for a default generated extended double-precision NaN.
|
static long |
floatx80_default_nan_low |
static byte |
floatx80_rounding_precision
Software IEC/IEEE extended double-precision rounding precision.
|
static int |
LONG_BYTES |
Constructor and Description |
---|
JSoftFloat() |
Modifier and Type | Method and Description |
---|---|
static Float128 |
addFloat128Sigs(Float128 a,
Float128 b,
boolean zSign)
Returns the result of adding the absolute values of the quadruple-precision floating-point
values `a' and `b'.
|
static double |
addFloat64Sigs(double a,
double b,
boolean zSign)
Returns the result of adding the absolute values of the double-precision floating-point values
`a' and `b'.
|
static FloatX80 |
addFloatx80Sigs(FloatX80 a,
FloatX80 b,
boolean zSign)
Returns the result of adding the absolute values of the extended double- precision
floating-point values `a' and `b'.
|
static float |
commonNaNToFloat32(CommonNaNT a)
Returns the result of converting the canonical NaN `a' to the single- precision floating-point
format.
|
static double |
commonNaNToFloat64(CommonNaNT a)
Returns the result of converting the canonical NaN `a' to the double- precision floating-point
format.
|
static FloatX80 |
commonNaNToFloatx80(CommonNaNT a)
Returns the result of converting the canonical NaN `a' to the extended double-precision
floating-point format.
|
static JSoftFloatUtils.FloatRoundingMode |
convertFloatRoundingMode(int roundingMode) |
static int |
extractFloat128Exp(Float128 a)
Returns the exponent bits of the quadruple-precision floating-point value `a'.
|
static long |
extractFloat128Frac0(Float128 a)
Returns the most-significant 48 fraction bits of the quadruple-precision floating-point value
`a'.
|
static long |
extractFloat128Frac1(Float128 a)
Returns the least-significant 64 fraction bits of the quadruple-precision floating-point value
`a'.
|
static boolean |
extractFloat128Sign(Float128 a)
Returns the sign bit of the quadruple-precision floating-point value `a'.
|
static short |
extractFloat32Exp(float a)
Returns the exponent bits of the single-precision floating-point value `a'.
|
static int |
extractFloat32Frac(float a)
Returns the fraction bits of the single-precision floating-point value 'a'.
|
static boolean |
extractFloat32Sign(float a)
Returns the sign bit of the single-precision floating-point value `a'.
|
static short |
extractFloat64Exp(double a)
Returns the exponent bits of the double-precision floating-point value `a'.
|
static long |
extractFloat64Frac(double a)
Returns the fraction bits of the double-precision floating-point value `a'.
|
static boolean |
extractFloat64Sign(double a)
Returns the sign bit of the double-precision floating-point value `a'.
|
static int |
extractFloatx80Exp(FloatX80 a)
Returns the exponent bits of the extended double-precision floating-point value `a'.
|
static long |
extractFloatx80Frac(FloatX80 a)
Returns the fraction bits of the extended double-precision floating-point value `a'.
|
static boolean |
extractFloatx80Sign(FloatX80 a)
Returns the sign bit of the extended double-precision floating-point value | `a'.
|
static Float128 |
float128_add(Float128 a,
Float128 b)
Returns the result of adding the quadruple-precision floating-point values `a' and `b'.
|
static Float128 |
float128_div(Float128 a,
Float128 b)
Returns the result of dividing the quadruple-precision floating-point value `a' by the
corresponding value `b'.
|
static boolean |
float128_eq_signaling(Float128 a,
Float128 b)
Returns 1 if the quadruple-precision floating-point value `a' is equal to the corresponding
value `b', and 0 otherwise.
|
static boolean |
float128_eq(Float128 a,
Float128 b)
Returns 1 if the quadruple-precision floating-point value `a' is equal to the corresponding
value `b', and 0 otherwise.
|
static boolean |
float128_le_quiet(Float128 a,
Float128 b)
Returns 1 if the quadruple-precision floating-point value `a' is less than or equal to the
corresponding value `b', and 0 otherwise.
|
static boolean |
float128_le(Float128 a,
Float128 b)
Returns 1 if the quadruple-precision floating-point value `a' is less than or equal to the
corresponding value `b', and 0 otherwise.
|
static boolean |
float128_lt_quiet(Float128 a,
Float128 b)
Returns 1 if the quadruple-precision floating-point value `a' is less than the corresponding
value `b', and 0 otherwise.
|
static boolean |
float128_lt(Float128 a,
Float128 b)
Returns 1 if the quadruple-precision floating-point value `a' is less than the corresponding
value `b', and 0 otherwise.
|
static Float128 |
float128_mul(Float128 a,
Float128 b)
Returns the result of multiplying the quadruple-precision floating-point values `a' and `b'.
|
static Float128 |
float128_rem(Float128 a,
Float128 b)
Returns the remainder of the quadruple-precision floating-point value `a' with respect to the
corresponding value `b'.
|
static Float128 |
float128_round_to_int(Float128 a)
Rounds the quadruple-precision floating-point value `a' to an integer, and returns the result
as a quadruple-precision floating-point value.
|
static Float128 |
float128_sqrt(Float128 a)
Returns the square root of the quadruple-precision floating-point value `a'.
|
static Float128 |
float128_sub(Float128 a,
Float128 b)
Returns the result of subtracting the quadruple-precision floating-point values `a' and `b'.
|
static float |
float128_to_float32(Float128 a)
Returns the result of converting the quadruple-precision floating-point value `a' to the
single-precision floating-point format.
|
static double |
float128_to_float64(Float128 a)
Returns the result of converting the quadruple-precision floating-point value `a' to the
double-precision floating-point format.
|
static FloatX80 |
float128_to_floatx80(Float128 a)
Returns the result of converting the quadruple-precision floating-point value `a' to the
extended double-precision floating-point format.
|
static int |
float128_to_int32_round_to_zero(Float128 a)
Returns the result of converting the quadruple-precision floating-point value `a' to the 32-bit
two's complement integer format.
|
static int |
float128_to_int32(Float128 a)
Returns the result of converting the quadruple-precision floating-point value `a' to the 32-bit
two's complement integer format.
|
static long |
float128_to_int64_round_to_zero(Float128 a)
Returns the result of converting the quadruple-precision floating-point value `a' to the 64-bit
two's complement integer format.
|
static long |
float128_to_int64(Float128 a)
Returns the result of converting the quadruple-precision floating-point value `a' to the 64-bit
two's complement integer format.
|
static CommonNaNT |
float128ToCommonNaN(Float128 a)
Returns the result of converting the quadruple-precision floating-point NaN `a' to the
canonical NaN format.
|
static float |
float32_add(float a,
float b)
Returns the result of adding the single-precision floating-point values `a' and `b'.
|
static float |
float32_div(float a,
float b)
Returns the result of dividing the single-precision floating-point value `a' by the
corresponding value `b'.
|
static boolean |
float32_eq_signaling(float a,
float b)
Returns 1 if the single-precision floating-point value `a' is equal to the corresponding value
`b', and 0 otherwise.
|
static boolean |
float32_eq(float a,
float b)
Returns 1 if the single-precision floating-point value `a' is equal to the corresponding value
`b', and 0 otherwise.
|
static boolean |
float32_is_nan(float a)
Returns 1 if the single-precision floating-point value `a' is a NaN; otherwise returns 0.
|
static boolean |
float32_is_signaling_nan(float a)
Returns 1 if the single-precision floating-point value `a' is a signaling NaN; otherwise
returns 0.
|
static boolean |
float32_le_quiet(float a,
float b)
Returns 1 if the single-precision floating-point value `a' is less than or equal to the
corresponding value `b', and 0 otherwise.
|
static boolean |
float32_le(float a,
float b)
Returns 1 if the single-precision floating-point value `a' is less than or equal to the
corresponding value `b', and 0 otherwise.
|
static boolean |
float32_lt_quiet(float a,
float b)
Returns 1 if the single-precision floating-point value `a' is less than the corresponding value
`b', and 0 otherwise.
|
static boolean |
float32_lt(float a,
float b)
Returns 1 if the single-precision floating-point value `a' is less than the corresponding
value `b', and 0 otherwise.
|
static float |
float32_mul(float a,
float b)
Returns the result of multiplying the single-precision floating-point values `a' and `b'.
|
static float |
float32_rem(float a,
float b)
Returns the remainder of the single-precision floating-point value `a' with respect to the
corresponding value `b'.
|
static float |
float32_round_to_int(float a) |
static float |
float32_round_to_int(float a,
JSoftFloatUtils.FloatRoundingMode rounding_mode,
boolean exact)
Rounds the single-precision floating-point value `a' to an integer, and returns the result as a
single-precision floating-point value.
|
static float |
float32_round_to_int2(float a)
Rounds the single-precision floating-point value `a' to an integer, and returns the result as a
single-precision floating-point value.
|
static float |
float32_round(float a,
int rounding_mode,
int exact) |
static float |
float32_sqrt(float a)
Returns the square root of the single-precision floating-point value `a'.
|
static float |
float32_sub(float a,
float b)
Returns the result of subtracting the single-precision floating-point values `a' and `b'.
|
static Float128 |
float32_to_float128(float a)
Returns the result of converting the single-precision floating-point value `a' to the
double-precision floating-point format.
|
static double |
float32_to_float64(float a)
Returns the result of converting the single-precision floating-point value `a' to the
double-precision floating-point format.
|
static FloatX80 |
float32_to_floatx80(float a)
Returns the result of converting the single-precision floating-point value `a' to the extended
double-precision floating-point format.
|
static int |
float32_to_int32_round_to_zero(float a)
Returns the result of converting the single-precision floating-point value `a' to the 32-bit
two's complement integer format.
|
static int |
float32_to_int32(float a)
Returns the result of converting the single-precision floating-point value `a' to the 32-bit
two's complement integer format.
|
static long |
float32_to_int64_round_to_zero(float a)
Returns the result of converting the single-precision floating-point value `a' to the 64-bit
two's complement integer format.
|
static long |
float32_to_int64(float a)
Returns the result of converting the single-precision floating-point value `a' to the 64-bit
two's complement integer format.
|
static double |
float64_add(double a,
double b)
Returns the result of adding the double-precision floating-point values `a' and `b'.
|
static double |
float64_div(double a,
double b)
Returns the result of dividing the double-precision floating-point value `a' by the
corresponding value `b'.
|
static boolean |
float64_eq_signaling(double a,
double b)
Returns 1 if the double-precision floating-point value `a' is equal to the corresponding value
`b', and 0 otherwise.
|
static boolean |
float64_eq(double a,
double b)
Returns 1 if the double-precision floating-point value `a' is equal to the corresponding value
`b', and 0 otherwise.
|
static boolean |
float64_is_nan(double a)
Returns 1 if the double-precision floating-point value `a' is a NaN; otherwise returns 0.
|
static boolean |
float64_is_signaling_nan(double a)
Returns 1 if the double-precision floating-point value `a' is a signaling NaN; otherwise
returns 0.
|
static boolean |
float64_le_quiet(double a,
double b)
Returns 1 if the double-precision floating-point value `a' is less than or equal to the
corresponding value `b', and 0 otherwise.
|
static boolean |
float64_le(double a,
double b)
Returns 1 if the double-precision floating-point value `a' is less than or equal to the
corresponding value `b', and 0 otherwise.
|
static boolean |
float64_lt_quiet(double a,
double b)
Returns 1 if the double-precision floating-point value `a' is less than the corresponding value
`b', and 0 otherwise.
|
static boolean |
float64_lt(double a,
double b)
Returns 1 if the double-precision floating-point value `a' is less than the corresponding value
`b', and 0 otherwise.
|
static double |
float64_mul(double a,
double b)
Returns the result of multiplying the double-precision floating-point values `a' and `b'.
|
static double |
float64_rem(double a,
double b)
Returns the remainder of the double-precision floating-point value `a' with respect to the
corresponding value `b'.
|
static double |
float64_round_to_int(double a) |
static double |
float64_round_to_int(double a,
JSoftFloatUtils.FloatRoundingMode rounding_mode,
boolean exact)
Rounds the double-precision floating-point value `a' to an integer, and returns the result as a
double-precision floating-point value.
|
static double |
float64_round_to_int2(double a)
Rounds the double-precision floating-point value `a' to an integer, and returns the result as a
double-precision floating-point value.
|
static double |
float64_round(double a,
int rounding_mode,
int exact) |
static double |
float64_sqrt(double a)
Returns the square root of the double-precision floating-point value `a'.
|
static double |
float64_sub(double a,
double b)
Returns the result of subtracting the double-precision floating-point values `a' and `b'.
|
static Float128 |
float64_to_float128(double a)
Returns the result of converting the double-precision floating-point value `a' to the
quadruple-precision floating-point format.
|
static float |
float64_to_float32(double a)
Returns the result of converting the double-precision floating-point value `a' to the
single-precision floating-point format.
|
static FloatX80 |
float64_to_floatx80(double a)
Returns the result of converting the double-precision floating-point value `a' to the extended
double-precision floating-point format.
|
static int |
float64_to_int32_round_to_zero(double a)
Returns the result of converting the double-precision floating-point value `a' to the 32-bit
two's complement integer format.
|
static int |
float64_to_int32(double a)
Returns the result of converting the double-precision floating-point value `a' to the 32-bit
two's complement integer format.
|
static long |
float64_to_int64_round_to_zero(double a)
Returns the result of converting the double-precision floating-point value `a' to the 64-bit
two's complement integer format.
|
static long |
float64_to_int64(double a)
Returns the result of converting the double-precision floating-point value `a' to the 64-bit
two's complement integer format.
|
static CommonNaNT |
float64ToCommonNaN(double a)
Returns the result of converting the double-precision floating-point NaN `a' to the canonical
NaN format.
|
static FloatX80 |
floatx80_add(FloatX80 a,
FloatX80 b)
Returns the result of adding the extended double-precision floating-point values `a' and `b'.
|
static FloatX80 |
floatx80_div(FloatX80 a,
FloatX80 b)
Returns the result of dividing the extended double-precision floating-point value `a' by the
corresponding value `b'.
|
static boolean |
floatx80_eq_signaling(FloatX80 a,
FloatX80 b)
Returns 1 if the extended double-precision floating-point value `a' is equal to the
corresponding value `b', and 0 otherwise.
|
static boolean |
floatx80_eq(FloatX80 a,
FloatX80 b)
Returns 1 if the extended double-precision floating-point value `a' is equal to the
corresponding value `b', and 0 otherwise.
|
static boolean |
floatx80_is_nan(FloatX80 a)
Returns 1 if the extended double-precision floating-point value `a' is a NaN; otherwise returns
0.
|
static boolean |
floatx80_is_signaling_nan(FloatX80 a)
Returns 1 if the extended double-precision floating-point value `a' is a signaling NaN;
otherwise returns 0.
|
static boolean |
floatx80_le_quiet(FloatX80 a,
FloatX80 b)
Returns 1 if the extended double-precision floating-point value `a' is less than or equal to
the corresponding value `b', and 0 otherwise.
|
static boolean |
floatx80_le(FloatX80 a,
FloatX80 b)
Returns 1 if the extended double-precision floating-point value `a' is less than or equal to
the corresponding value `b', and 0 otherwise.
|
static boolean |
floatx80_lt_quiet(FloatX80 a,
FloatX80 b)
Returns 1 if the extended double-precision floating-point value `a' is less than the
corresponding value `b', and 0 otherwise.
|
static boolean |
floatx80_lt(FloatX80 a,
FloatX80 b)
Returns 1 if the extended double-precision floating-point value `a' is less than the
corresponding value `b', and 0 otherwise.
|
static FloatX80 |
floatx80_mul(FloatX80 a,
FloatX80 b)
Returns the result of multiplying the extended double-precision floating- point values `a' and
`b'.
|
static FloatX80 |
floatx80_rem(FloatX80 a,
FloatX80 b)
Returns the remainder of the extended double-precision floating-point value `a' with respect to
the corresponding value `b'.
|
static FloatX80 |
floatx80_round_to_int(FloatX80 a)
Rounds the extended double-precision floating-point value `a' to an integer, and returns the
result as an extended quadruple-precision floating-point value.
|
static FloatX80 |
floatx80_sqrt(FloatX80 a)
Returns the square root of the extended double-precision floating-point value `a'.
|
static FloatX80 |
floatx80_sub(FloatX80 a,
FloatX80 b)
Returns the result of subtracting the extended double-precision floating- point values `a' and
`b'.
|
static Float128 |
floatx80_to_float128(FloatX80 a)
Returns the result of converting the extended double-precision floating- point value `a' to the
quadruple-precision floating-point format.
|
static float |
floatx80_to_float32(FloatX80 a)
Returns the result of converting the extended double-precision floating- point value `a' to the
single-precision floating-point format.
|
static double |
floatx80_to_float64(FloatX80 a)
Returns the result of converting the extended double-precision floating- point value `a' to the
double-precision floating-point format.
|
static int |
floatx80_to_int32_round_to_zero(FloatX80 a)
Returns the result of converting the extended double-precision floating- point value `a' to the
32-bit two's complement integer format.
|
static int |
floatx80_to_int32(FloatX80 a)
Returns the result of converting the extended double-precision floating- point value `a' to the
32-bit two's complement integer format.
|
static long |
floatx80_to_int64_round_to_zero(FloatX80 a)
Returns the result of converting the extended double-precision floating- point value `a' to the
64-bit two's complement integer format.
|
static long |
floatx80_to_int64(FloatX80 a)
Returns the result of converting the extended double-precision floating- point value `a' to the
64-bit two's complement integer format.
|
static CommonNaNT |
floatx80ToCommonNaN(FloatX80 a)
Returns the result of converting the extended double-precision floating- point NaN `a' to the
canonical NaN format.
|
static int |
getFloatExceptionFlags()
Get the exception flag.
|
static int |
getFloatRoundingMode() |
static Float128 |
int32_to_float128(int a)
Returns the result of converting the 32-bit two's complement integer `a' to the
quadruple-precision floating-point format.
|
static float |
int32_to_float32(int a)
Returns the result of converting the 32-bit two's complement integer `a' to the
single-precision floating-point format.
|
static double |
int32_to_float64(int a)
Returns the result of converting the 32-bit two's complement integer `a' to the
double-precision floating-point format.
|
static FloatX80 |
int32_to_floatx80(int a)
Returns the result of converting the 32-bit two's complement integer `a' to the extended
double-precision floating-point format.
|
static Float128 |
int64_to_float128(long a)
Returns the result of converting the 64-bit two's complement integer `a' to the
quadruple-precision floating-point format.
|
static float |
int64_to_float32(long a)
Returns the result of converting the 64-bit two's complement integer `a' to the
single-precision floating-point format.
|
static double |
int64_to_float64(long a)
Returns the result of converting the 64-bit two's complement integer `a' to the
double-precision floating-point format.
|
static FloatX80 |
int64_to_floatx80(long a)
Returns the result of converting the 64-bit two's complement integer `a' to the extended
double-precision floating-point format.
|
static java.util.List<java.lang.Number> |
normalizeFloat128Subnormal(long aSig0,
long aSig1,
int zExpPtr,
long zSig0Ptr,
long zSig1Ptr)
Normalizes the subnormal quadruple-precision floating-point value represented by the
denormalized significand formed by the concatenation of `aSig0' and `aSig1'.
|
static java.util.List<java.lang.Number> |
normalizeFloat32Subnormal(int aSig)
Normalizes the subnormal single-precision floating-point value represented by the denormalized
significand `aSig'.
|
static java.util.List<java.lang.Number> |
normalizeFloatx80Subnormal(long aSig)
Normalizes the subnormal extended double-precision floating-point value represented by the
denormalized significand `aSig'.
|
static Float128 |
normalizeRoundAndPackFloat128(boolean zSign,
int zExp,
long zSig0,
long zSig1)
Takes an abstract floating-point value having sign `zSign', exponent `zExp', and significand
formed by the concatenation of `zSig0' and `zSig1', and returns the proper quadruple-precision
floating-point value corresponding to the abstract input.
|
static float |
normalizeRoundAndPackFloat32(boolean zSign,
short zExp,
int zSig)
Takes an abstract floating-point value having sign `zSign', exponent `zExp', and significand
`zSig', and returns the proper single-precision floating- point value corresponding to the
abstract input.
|
static double |
normalizeRoundAndPackFloat64(boolean zSign,
short zExp,
long zSig)
Takes an abstract floating-point value having sign `zSign', exponent `zExp', and significand
`zSig', and returns the proper double-precision floating- point value corresponding to the
abstract input.
|
static Float128 |
packFloat128(boolean zSign,
int zExp,
long zSig0,
long zSig1)
Packs the sign `zSign', the exponent `zExp', and the significand formed by the concatenation of
`zSig0' and `zSig1' into a quadruple-precision floating-point value, returning the result.
|
static float |
packFloat32(boolean zSign,
short zExp,
int zSig)
Packs the sign `zSign', exponent `zExp', and significand `zSig' into a single-precision
floating-point value, returning the result.
|
static double |
packFloat64(boolean zSign,
short zExp,
long zSig)
Packs the sign `zSign', exponent `zExp', and significand `zSig' into a double-precision
floating-point value, returning the result.
|
static FloatX80 |
packFloatx80(boolean zSign,
int zExp,
long zSig)
Packs the sign `zSign', exponent `zExp', and significand `zSig' into an extended
double-precision floating-point value, returning the result.
|
static Float128 |
propagateFloat128NaN(Float128 a,
Float128 b)
Takes two quadruple-precision floating-point values `a' and `b', one of which is a NaN, and
returns the appropriate NaN result.
|
static float |
propagateFloat32NaN(float a,
float b)
Takes two single-precision floating-point values `a' and `b', one of which is a NaN, and
returns the appropriate NaN result.
|
static double |
propagateFloat64NaN(double a,
double b)
Takes two double-precision floating-point values `a' and `b', one of which is a NaN, and
returns the appropriate NaN result.
|
static FloatX80 |
propagateFloatx80NaN(FloatX80 a,
FloatX80 b)
Takes two extended double-precision floating-point values `a' and `b', one of which is a NaN,
and returns the appropriate NaN result.
|
static Float128 |
roundAndPackFloat128(boolean zSign,
int zExp,
long zSig0,
long zSig1,
long zSig2)
Takes an abstract floating-point value having sign `zSign', exponent `zExp', and extended
significand formed by the concatenation of `zSig0', `zSig1', and `zSig2', and returns the
proper quadruple-precision floating-point value corresponding to the abstract input.
|
static int |
roundAndPackInt32(boolean zSign,
long absZ)
Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 and 7, and returns the
properly rounded 32-bit integer corresponding to the input.
|
static long |
roundAndPackInt64(boolean zSign,
long absZ0,
long absZ1)
Takes the 128-bit fixed-point value formed by concatenating `absZ0' and `absZ1', with binary
point between bits 63 and 64 (between the input words), and returns the properly rounded 64-bit
integer corresponding to the input.
|
static void |
setFloatExceptionFlags(int i)
Set the exception flag.
|
static void |
setFloatRoundingMode(int roundingMode) |
static Float128 |
subFloat128Sigs(Float128 a,
Float128 b,
boolean zSign)
Returns the result of subtracting the absolute values of the quadruple- precision
floating-point values `a' and `b'.
|
static double |
subFloat64Sigs(double a,
double b,
boolean zSign)
Returns the result of subtracting the absolute values of the double- precision floating-point
values `a' and `b'.
|
static FloatX80 |
subFloatx80Sigs(FloatX80 a,
FloatX80 b,
boolean zSign)
Returns the result of subtracting the absolute values of the extended double-precision
floating-point values `a' and `b'.
|
public static final int LONG_BYTES
public static final long float128_default_nan_high
public static final long float128_default_nan_low
public static final short floatx80_default_nan_high
public static final long floatx80_default_nan_low
public static byte floatx80_rounding_precision
public static final float float32_default_nan
public static final double float64_default_nan
public static byte float_exception_flags
public static int getFloatRoundingMode()
public static void setFloatRoundingMode(int roundingMode)
public static JSoftFloatUtils.FloatRoundingMode convertFloatRoundingMode(int roundingMode)
public static void setFloatExceptionFlags(int i)
i
- public static int getFloatExceptionFlags()
public static int roundAndPackInt32(boolean zSign, long absZ)
zSign
- if true
then the input is negatedabsZ
- - fixed-point value (bits64)public static long roundAndPackInt64(boolean zSign, long absZ0, long absZ1)
zSign
- zSign if true
then the input is negatedabsZ0
- 1st part of the 128-bit fixed-point value (bits64)absZ1
- 2nd part of the 128-bit fixed-point value (bits64)public static int extractFloat32Frac(float a)
a
- the single-precision floating-point value (float32)public static short extractFloat32Exp(float a)
a
- the single-precision floating-point value (float32)public static boolean extractFloat32Sign(float a)
a
- the single-precision floating-point value (float32)public static java.util.List<java.lang.Number> normalizeFloat32Subnormal(int aSig)
aSig
- the subnormal single-precision floating-point value (bits32)public static float packFloat32(boolean zSign, short zExp, int zSig)
zSign
- the sign.zExp
- the exponent.zSig
- the significand.public static float normalizeRoundAndPackFloat32(boolean zSign, short zExp, int zSig)
zSign
- sign of abstract floating-point value (flag)zExp
- exponent of abstract floating-point value (int16)zSig
- significand of abstract floating-point value (bits32)public static long extractFloat64Frac(double a)
a
- the double-precision floating-point value.public static short extractFloat64Exp(double a)
a
- float64public static boolean extractFloat64Sign(double a)
a
- the double-precision floating-point value.public static double packFloat64(boolean zSign, short zExp, long zSig)
zSign
- sign of a double-precision floating-point value.zExp
- exponent of a double-precision floating-point value.zSig
- significand of a double-precision floating-point value.public static double normalizeRoundAndPackFloat64(boolean zSign, short zExp, long zSig)
zSign
- flagzExp
- int16zSig
- bits64public static long extractFloatx80Frac(FloatX80 a)
a
- floatx80public static int extractFloatx80Exp(FloatX80 a)
a
- floatx80public static boolean extractFloatx80Sign(FloatX80 a)
a
- floatx80public static java.util.List<java.lang.Number> normalizeFloatx80Subnormal(long aSig)
aSig
- bits64zExpPtr
- int32zSigPtr
- bits64public static FloatX80 packFloatx80(boolean zSign, int zExp, long zSig)
zSign
- flagzExp
- int32zSig
- bits64public static long extractFloat128Frac1(Float128 a)
a
- float128public static long extractFloat128Frac0(Float128 a)
a
- float128public static int extractFloat128Exp(Float128 a)
a
- float128public static boolean extractFloat128Sign(Float128 a)
a
- float128public static java.util.List<java.lang.Number> normalizeFloat128Subnormal(long aSig0, long aSig1, int zExpPtr, long zSig0Ptr, long zSig1Ptr)
aSig0
- float128aSig1
- float128zExpPtr
- 1int32zSig0Ptr
- float128zSig1Ptr
- float128public static Float128 packFloat128(boolean zSign, int zExp, long zSig0, long zSig1)
zSign
- flagzExp
- 1int32zSig0
- bits64zSig1
- bits64public static Float128 roundAndPackFloat128(boolean zSign, int zExp, long zSig0, long zSig1, long zSig2)
zSign
- flagzExp
- int32zSig0
- bits64zSig1
- bits64zSig2
- bits64public static Float128 normalizeRoundAndPackFloat128(boolean zSign, int zExp, long zSig0, long zSig1)
zSign
- flagzExp
- int32zSig0
- bits64zSig1
- bits64public static float int32_to_float32(int a)
a
- the 32-bit integer.public static double int32_to_float64(int a)
a
- the 32-bit integer.public static FloatX80 int32_to_floatx80(int a)
a
- the 32-bit integer.public static Float128 int32_to_float128(int a)
a
- the 32-bit integer.public static float int64_to_float32(long a)
a
- the 64-bit integer.public static double int64_to_float64(long a)
a
- the 64-bit integer.public static FloatX80 int64_to_floatx80(long a)
a
- the 64-bit integer.public static Float128 int64_to_float128(long a)
a
- the 64-bit integer.public static int float32_to_int32(float a)
a
- the single-precision floating-point value.public static int float32_to_int32_round_to_zero(float a)
a
- the single-precision floating-point value.public static long float32_to_int64(float a)
a
- the single-precision floating-point value.public static long float32_to_int64_round_to_zero(float a)
a
- the single-precision floating-point value.public static boolean float32_is_signaling_nan(float a)
a
- the single-precision floating-point value.public static double commonNaNToFloat64(CommonNaNT a)
a
- - commonNaNTpublic static double float32_to_float64(float a)
a
- the single-precision floating-point value.public static FloatX80 float32_to_floatx80(float a)
a
- the single-precision floating-point value.public static FloatX80 commonNaNToFloatx80(CommonNaNT a)
a
- commonNaNTpublic static Float128 float32_to_float128(float a)
a
- the single-precision floating-point value.public static float float32_round_to_int2(float a)
a
- the single-precision floating-point value.public static float propagateFloat32NaN(float a, float b)
a
- the single-precision floating-point value.b
- the single-precision floating-point value.public static boolean float32_is_nan(float a)
a
- the single-precision floating-point value.public static float float32_add(float a, float b)
a
- the single-precision floating-point value.b
- the single-precision floating-point value.public static float float32_sub(float a, float b)
a
- the single-precision floating-point value.b
- the single-precision floating-point value.public static float float32_mul(float a, float b)
a
- the single-precision floating-point value.b
- the single-precision floating-point value.public static float float32_div(float a, float b)
a
- the single-precision floating-point value.b
- the single-precision floating-point value.public static float float32_rem(float a, float b)
a
- the single-precision floating-point value.b
- the single-precision floating-point value.public static float float32_sqrt(float a)
a
- the single-precision floating-point value.public static boolean float32_eq(float a, float b)
a
- the single-precision floating-point value.b
- the single-precision floating-point value.public static boolean float32_le(float a, float b)
a
- the single-precision floating-point value.b
- the single-precision floating-point value.public static boolean float32_lt(float a, float b)
a
- the single-precision floating-point value.b
- the single-precision floating-point value.public static boolean float32_eq_signaling(float a, float b)
a
- the single-precision floating-point value.b
- the single-precision floating-point value.public static boolean float32_le_quiet(float a, float b)
a
- the single-precision floating-point value.b
- the single-precision floating-point value.public static boolean float32_lt_quiet(float a, float b)
a
- the single-precision floating-point value.b
- the single-precision floating-point value.public static int float64_to_int32(double a)
a
- the double-precision floating-point value.public static int float64_to_int32_round_to_zero(double a)
a
- the double-precision floating-point value.public static long float64_to_int64(double a)
a
- the double-precision floating-point value.public static long float64_to_int64_round_to_zero(double a)
a
- the double-precision floating-point value.public static float float64_to_float32(double a)
a
- the double-precision floating-point value.public static CommonNaNT float64ToCommonNaN(double a)
a
- the double-precision floating-point value.public static boolean float64_is_signaling_nan(double a)
a
- the double-precision floating-point value.public static float commonNaNToFloat32(CommonNaNT a)
a
- commonNaNTpublic static FloatX80 float64_to_floatx80(double a)
a
- the double-precision floating-point value.public static Float128 float64_to_float128(double a)
a
- the double-precision floating-point value.public static double float64_round_to_int2(double a)
a
- the double-precision floating-point value.public static double propagateFloat64NaN(double a, double b)
a
- the double-precision floating-point value.b
- the double-precision floating-point value.public static boolean float64_is_nan(double a)
a
- float64public static double addFloat64Sigs(double a, double b, boolean zSign)
a
- the double-precision floating-point value.b
- the double-precision floating-point value.zSign
- flagpublic static double subFloat64Sigs(double a, double b, boolean zSign)
a
- the double-precision floating-point value.b
- the double-precision floating-point value.zSign
- flagpublic static double float64_add(double a, double b)
a
- the double-precision floating-point value.b
- the double-precision floating-point value.public static double float64_sub(double a, double b)
a
- the double-precision floating-point value.b
- the double-precision floating-point value.public static double float64_mul(double a, double b)
a
- the double-precision floating-point value.b
- the double-precision floating-point value.public static double float64_div(double a, double b)
a
- the double-precision floating-point value.b
- the double-precision floating-point value.public static double float64_rem(double a, double b)
a
- the double-precision floating-point value.b
- the double-precision floating-point value.public static double float64_sqrt(double a)
a
- the double-precision floating-point value.public static boolean float64_eq(double a, double b)
a
- the double-precision floating-point value.b
- the double-precision floating-point value.public static boolean float64_le(double a, double b)
a
- the double-precision floating-point value.b
- the double-precision floating-point value.public static boolean float64_lt(double a, double b)
a
- the double-precision floating-point value.b
- the double-precision floating-point value.public static boolean float64_eq_signaling(double a, double b)
a
- the double-precision floating-point value.b
- the double-precision floating-point value.public static boolean float64_le_quiet(double a, double b)
a
- the double-precision floating-point value.b
- the double-precision floating-point value.public static boolean float64_lt_quiet(double a, double b)
a
- the double-precision floating-point value.b
- the double-precision floating-point value.public static int floatx80_to_int32(FloatX80 a)
a
- the extended double-precision floating-point value.public static int floatx80_to_int32_round_to_zero(FloatX80 a)
a
- the extended double-precision floating-point value.public static long floatx80_to_int64(FloatX80 a)
a
- the extended double-precision floating-point value.public static long floatx80_to_int64_round_to_zero(FloatX80 a)
a
- the extended double-precision floating-point value.public static float floatx80_to_float32(FloatX80 a)
a
- the extended double-precision floating-point value.public static CommonNaNT floatx80ToCommonNaN(FloatX80 a)
a
- the extended double-precision floating-point value.public static boolean floatx80_is_signaling_nan(FloatX80 a)
a
- the extended double-precision floating-point value.public static double floatx80_to_float64(FloatX80 a)
a
- the extended double-precision floating-point value.public static Float128 floatx80_to_float128(FloatX80 a)
a
- the extended double-precision floating-point value.public static FloatX80 floatx80_round_to_int(FloatX80 a)
a
- the extended double-precision floating-point value.public static FloatX80 propagateFloatx80NaN(FloatX80 a, FloatX80 b)
a
- the extended double-precision floating-point value.b
- the extended double-precision floating-point value.public static boolean floatx80_is_nan(FloatX80 a)
a
- the extended double-precision floating-point value.public static FloatX80 addFloatx80Sigs(FloatX80 a, FloatX80 b, boolean zSign)
a
- the extended double-precision floating-point value.b
- the extended double-precision floating-point value.zSign
- flagpublic static FloatX80 subFloatx80Sigs(FloatX80 a, FloatX80 b, boolean zSign)
a
- the extended double-precision floating-point value.b
- the extended double-precision floating-point value.zSign
- flagpublic static FloatX80 floatx80_add(FloatX80 a, FloatX80 b)
a
- the extended double-precision floating-point value.b
- the extended double-precision floating-point value.public static FloatX80 floatx80_sub(FloatX80 a, FloatX80 b)
a
- the extended double-precision floating-point value.b
- the extended double-precision floating-point value.public static FloatX80 floatx80_mul(FloatX80 a, FloatX80 b)
a
- the extended double-precision floating-point value.b
- the extended double-precision floating-point value.public static FloatX80 floatx80_div(FloatX80 a, FloatX80 b)
a
- the extended double-precision floating-point value.b
- the extended double-precision floating-point value.public static FloatX80 floatx80_rem(FloatX80 a, FloatX80 b)
a
- the extended double-precision floating-point value.b
- the extended double-precision floating-point value.public static FloatX80 floatx80_sqrt(FloatX80 a)
a
- the extended double-precision floating-point value.public static boolean floatx80_eq(FloatX80 a, FloatX80 b)
a
- the extended double-precision floating-point value.b
- the extended double-precision floating-point value.true
if a == b, false
otherwise.public static boolean floatx80_le(FloatX80 a, FloatX80 b)
a
- the extended double-precision floating-point value.b
- the extended double-precision floating-point value.true
if a <= b, false
if a > b.public static boolean floatx80_lt(FloatX80 a, FloatX80 b)
a
- the extended double-precision floating-point value.b
- the extended double-precision floating-point value.true
if a < b, false
if a >= b.public static boolean floatx80_eq_signaling(FloatX80 a, FloatX80 b)
a
- the extended double-precision floating-point value.b
- the extended double-precision floating-point value.true
if a == b, false
otherwise.public static boolean floatx80_le_quiet(FloatX80 a, FloatX80 b)
a
- the extended double-precision floating-point value.b
- the extended double-precision floating-point value.true
if a <= b, false
if a > b.public static boolean floatx80_lt_quiet(FloatX80 a, FloatX80 b)
a
- the extended double-precision floating-point value.b
- the extended double-precision floating-point value.true
if a < b, false
if a >= b.public static int float128_to_int32(Float128 a)
a
- the quadruple-precision floating-point value.public static int float128_to_int32_round_to_zero(Float128 a)
a
- the quadruple-precision floating-point value.public static long float128_to_int64(Float128 a)
a
- the quadruple-precision floating-point value.public static long float128_to_int64_round_to_zero(Float128 a)
a
- the quadruple-precision floating-point value.public static float float128_to_float32(Float128 a)
a
- the quadruple-precision floating-point value.public static CommonNaNT float128ToCommonNaN(Float128 a)
a
- the quadruple-precision floating-point value.public static double float128_to_float64(Float128 a)
a
- the quadruple-precision floating-point value.public static FloatX80 float128_to_floatx80(Float128 a)
a
- the quadruple-precision floating-point value.public static Float128 float128_round_to_int(Float128 a)
a
- the quadruple-precision floating-point value.public static Float128 propagateFloat128NaN(Float128 a, Float128 b)
a
- the quadruple-precision floating-point value.b
- the quadruple-precision floating-point value.public static Float128 addFloat128Sigs(Float128 a, Float128 b, boolean zSign)
a
- the quadruple-precision floating-point value.b
- the quadruple-precision floating-point value.zSign
- if zSign == true
the sum is negated before being returned,
if zSign == false
otherwise.public static Float128 subFloat128Sigs(Float128 a, Float128 b, boolean zSign)
a
- the quadruple-precision floating-point value.b
- the quadruple-precision floating-point value.zSign
- if zSign == true
the difference is negated before being returned,
if zSign == false
otherwise.public static Float128 float128_add(Float128 a, Float128 b)
a
- the quadruple-precision floating-point value.b
- the quadruple-precision floating-point value.public static Float128 float128_sub(Float128 a, Float128 b)
a
- the quadruple-precision floating-point value.b
- the quadruple-precision floating-point value.public static Float128 float128_mul(Float128 a, Float128 b)
a
- the quadruple-precision floating-point value.b
- the quadruple-precision floating-point value.public static Float128 float128_div(Float128 a, Float128 b)
a
- the quadruple-precision floating-point value (dividend).b
- the quadruple-precision floating-point value (divider).public static Float128 float128_rem(Float128 a, Float128 b)
a
- the quadruple-precision floating-point value.b
- the quadruple-precision floating-point value.public static Float128 float128_sqrt(Float128 a)
a
- the quadruple-precision floating-point value.public static boolean float128_eq(Float128 a, Float128 b)
a
- the quadruple-precision floating-point value.b
- the quadruple-precision floating-point value.true
if a == b, false
otherwise.public static boolean float128_le(Float128 a, Float128 b)
a
- the quadruple-precision floating-point value.b
- the quadruple-precision floating-point value.true
if a <= b, false
if a > b.public static boolean float128_lt(Float128 a, Float128 b)
a
- the quadruple-precision floating-point value.b
- the quadruple-precision floating-point value.true
if a < b, false
if a >= b.public static boolean float128_eq_signaling(Float128 a, Float128 b)
a
- the quadruple-precision floating-point value.b
- the quadruple-precision floating-point value.true
if a == b, false
otherwise.public static boolean float128_le_quiet(Float128 a, Float128 b)
a
- the quadruple-precision floating-point value.b
- the quadruple-precision floating-point value.true
if a <= b, false
if a > b.public static boolean float128_lt_quiet(Float128 a, Float128 b)
a
- the quadruple-precision floating-point value.b
- the quadruple-precision floating-point value.true
if a < b, false
if a >= b.public static double float64_round_to_int(double a)
public static double float64_round(double a, int rounding_mode, int exact)
public static double float64_round_to_int(double a, JSoftFloatUtils.FloatRoundingMode rounding_mode, boolean exact)
a
- the double-precision floating-point value.rounding_mode
- exact
- *public static float float32_round_to_int(float a)
public static float float32_round(float a, int rounding_mode, int exact)
public static float float32_round_to_int(float a, JSoftFloatUtils.FloatRoundingMode rounding_mode, boolean exact)
a
- the single-precision floating-point value.rounding_mode
- exact
-