Trait malachite_base::num::basic::floats::PrimitiveFloat

source ·
pub trait PrimitiveFloat:
    'static
    + Abs<Output = Self>
    + AbsAssign
    + Add<Output = Self>
    + AddAssign<Self>
    + AddMul<Output = Self>
    + AddMulAssign<Self, Self>
    + Ceiling<Output = Self>
    + CeilingAssign
    + CeilingLogBase2<Output = i64>
    + CeilingLogBasePowerOf2<u64, Output = i64>
    + CheckedLogBase2<Output = i64>
    + CheckedLogBasePowerOf2<u64, Output = i64>
    + ConvertibleFrom<u8>
    + ConvertibleFrom<u16>
    + ConvertibleFrom<u32>
    + ConvertibleFrom<u64>
    + ConvertibleFrom<u128>
    + ConvertibleFrom<usize>
    + ConvertibleFrom<i8>
    + ConvertibleFrom<i16>
    + ConvertibleFrom<i32>
    + ConvertibleFrom<i64>
    + ConvertibleFrom<i128>
    + ConvertibleFrom<isize>
    + Copy
    + Debug
    + Default
    + Display
    + Div<Output = Self>
    + DivAssign
    + Floor<Output = Self>
    + FloorAssign
    + FloorLogBase2<Output = i64>
    + FloorLogBasePowerOf2<u64, Output = i64>
    + FmtRyuString
    + From<f32>
    + FromStr
    + Infinity
    + IntegerMantissaAndExponent<u64, i64>
    + Into<f64>
    + IsInteger
    + IsPowerOf2
    + Ln
    + LowerExp
    + Min
    + Max
    + Mul<Output = Self>
    + MulAssign<Self>
    + Named
    + NaN
    + NegativeInfinity
    + NegativeZero
    + Neg<Output = Self>
    + NegAssign
    + NegativeOne
    + NextPowerOf2<Output = Self>
    + NextPowerOf2Assign
    + One
    + PartialEq<Self>
    + PartialOrd<Self>
    + PartialOrdAbs<Self>
    + Pow<i64, Output = Self>
    + Pow<Self, Output = Self>
    + PowAssign<i64>
    + PowAssign<Self>
    + PowerOf2<i64>
    + PrimeConstant
    + Product
    + RawMantissaAndExponent<u64, u64>
    + Reciprocal<Output = Self>
    + ReciprocalAssign
    + RefUnwindSafe
    + Rem<Output = Self>
    + RemAssign<Self>
    + RoundingFrom<u8>
    + RoundingFrom<u16>
    + RoundingFrom<u32>
    + RoundingFrom<u64>
    + RoundingFrom<u128>
    + RoundingFrom<usize>
    + RoundingFrom<i8>
    + RoundingFrom<i16>
    + RoundingFrom<i32>
    + RoundingFrom<i64>
    + RoundingFrom<i128>
    + RoundingFrom<isize>
    + RoundingInto<u8>
    + RoundingInto<u16>
    + RoundingInto<u32>
    + RoundingInto<u64>
    + RoundingInto<u128>
    + RoundingInto<usize>
    + RoundingInto<i8>
    + RoundingInto<i16>
    + RoundingInto<i32>
    + RoundingInto<i64>
    + RoundingInto<i128>
    + RoundingInto<isize>
    + SciMantissaAndExponent<Self, i64>
    + Sign
    + Sized
    + Sqrt<Output = Self>
    + SqrtAssign
    + Square<Output = Self>
    + SquareAssign
    + Sub<Output = Self>
    + SubAssign<Self>
    + SubMul<Output = Self>
    + SubMulAssign<Self, Self>
    + Sum<Self>
    + ThueMorseConstant
    + Two
    + UpperExp
    + Zero {
    const WIDTH: u64;
    const MANTISSA_WIDTH: u64;
    const MIN_POSITIVE_SUBNORMAL: Self;
    const MAX_SUBNORMAL: Self;
    const MIN_POSITIVE_NORMAL: Self;
    const MAX_FINITE: Self;
    const SMALLEST_UNREPRESENTABLE_UINT: u64;
    const LARGEST_ORDERED_REPRESENTATION: u64;
    const EXPONENT_WIDTH: u64 = _;
    const MIN_NORMAL_EXPONENT: i64 = _;
    const MIN_EXPONENT: i64 = _;
    const MAX_EXPONENT: i64 = _;
Show 18 methods // Required methods fn is_nan(self) -> bool; fn is_infinite(self) -> bool; fn is_finite(self) -> bool; fn is_normal(self) -> bool; fn is_sign_positive(self) -> bool; fn is_sign_negative(self) -> bool; fn classify(self) -> FpCategory; fn to_bits(self) -> u64; fn from_bits(v: u64) -> Self; // Provided methods fn is_negative_zero(self) -> bool { ... } fn abs_negative_zero(self) -> Self { ... } fn abs_negative_zero_assign(&mut self) { ... } fn next_higher(self) -> Self { ... } fn next_lower(self) -> Self { ... } fn to_ordered_representation(self) -> u64 { ... } fn from_ordered_representation(n: u64) -> Self { ... } fn precision(self) -> u64 { ... } fn max_precision_for_sci_exponent(exponent: i64) -> u64 { ... }
}
Expand description

This trait defines functions on primitive float types: f32 and f64.

Many of the functions here concern exponents and mantissas. We define three ways to express a float, each with its own exponent and mantissa. In the following, let $x$ be an arbitrary positive, finite, non-zero, non-NaN float. Let $M$ and $E$ be the mantissa width and exponent width of the floating point type; for f32s, this is 23 and 8, and for f64s it’s 52 and 11.

In the following we assume that $x$ is positive, but you can easily extend these definitions to negative floats by first taking their absolute value.

§raw form

The raw exponent and raw mantissa are the actual bit patterns used to represent the components of $x$. The raw exponent $e_r$ is an integer in $[0, 2^E-2]$ and the raw mantissa $m_r$ is an integer in $[0, 2^M-1]$. Since we are dealing with a nonzero $x$, we forbid $e_r$ and $m_r$ from both being zero. We have $$ x = \begin{cases} 2^{2-2^{E-1}-M}m_r & \text{if} \quad e_r = 0, \\ 2^{e_r-2^{E-1}+1}(2^{-M}m_r+1) & \textrm{otherwise}, \end{cases} $$ $$ e_r = \begin{cases} 0 & \text{if} \quad x < 2^{2-2^{E-1}}, \\ \lfloor \log_2 x \rfloor + 2^{E-1} - 1 & \textrm{otherwise}, \end{cases} $$ $$ m_r = \begin{cases} 2^{M+2^{E-1}-2}x & \text{if} \quad x < 2^{2-2^{E-1}}, \\ 2^M \left ( \frac{x}{2^{\lfloor \log_2 x \rfloor}}-1\right ) & \textrm{otherwise}. \end{cases} $$

§scientific form

We can write $x = 2^{e_s}m_s$, where $e_s$ is an integer and $m_s$ is a rational number with $1 \leq m_s < 2$. If $x$ is a valid float, the scientific mantissa $m_s$ is always exactly representable as a float of the same type. We have $$ x = 2^{e_s}m_s, $$ $$ e_s = \lfloor \log_2 x \rfloor, $$ $$ m_s = \frac{x}{2^{\lfloor \log_2 x \rfloor}}. $$

§integer form

We can also write $x = 2^{e_i}m_i$, where $e_i$ is an integer and $m_i$ is an odd integer. We have $$ x = 2^{e_i}m_i, $$ $e_i$ is the unique integer such that $x/2^{e_i}$is an odd integer, and $$ m_i = \frac{x}{2^{e_i}}. $$

Required Associated Constants§

source

const WIDTH: u64

The number of bits taken up by the type.

This is $M+E+1$. The three terms in the sum correspond to the width of the mantissa, the width of the exponent, and the sign bit.

  • For f32s, this is 32.
  • For f64s, this is 64.
source

const MANTISSA_WIDTH: u64

The number of bits taken up by the mantissa.

  • For f32s, this is 23.
  • For f64s, this is 52.
source

const MIN_POSITIVE_SUBNORMAL: Self

The smallest positive float. This is $2^{2-2^{E-1}-M}$.

  • For f32s, this is $2^{-149}$, or 1.0e-45.
  • For f64s, this is $2^{-1074}$, or 5.0e-324.
source

const MAX_SUBNORMAL: Self

The largest float in the subnormal range. This is $2^{2-2^{E-1}-M}(2^M-1)$.

  • For f32s, this is $2^{-149}(2^{23}-1)$, or 1.1754942e-38.
  • For f64s, this is $2^{-1074}(2^{52}-1)$, or 2.225073858507201e-308.
source

const MIN_POSITIVE_NORMAL: Self

The smallest positive normal float. This is $2^{2-2^{E-1}}$.

  • For f32s, this is $2^{-126}$, or 1.1754944e-38.
  • For f64s, this is $2^{-1022}$, or 2.2250738585072014e-308.
source

const MAX_FINITE: Self

The largest finite float. This is $2^{2^{E-1}-1}(2-2^{-M})$.

  • For f32s, this is $2^{127}(2-2^{-23})$, or 3.4028235e38.
  • For f64s, this is $2^{1023}(2-2^{-52})$, or 1.7976931348623157e308.
source

const SMALLEST_UNREPRESENTABLE_UINT: u64

The smallest positive integer that cannot be represented as a float. This is $2^{M+1}+1$.

  • For f32s, this is $2^{24}+1$, or 16777217.
  • For f64s, this is $2^{53}+1$, or 9007199254740993.
source

const LARGEST_ORDERED_REPRESENTATION: u64

If you list all floats in increasing order, excluding NaN and giving negative and positive zero separate adjacent spots, this will be index of the last element, positive infinity. It is $2^{M+1}(2^E-1)+1$.

  • For f32s, this is $2^{32}-2^{24}+1$, or 4278190081.
  • For f64s, this is $2^{64}-2^{53}+1$, or 18437736874454810625.

Provided Associated Constants§

source

const EXPONENT_WIDTH: u64 = _

The number of bits taken up by the exponent.

  • For f32s, this is 8.
  • For f64s, this is 11.
source

const MIN_NORMAL_EXPONENT: i64 = _

The smallest possible exponent of a float in the normal range. Any floats with smaller exponents are subnormal and thus have reduced precision. This is $2-2^{E-1}$.

  • For f32s, this is -126.
  • For f64s, this is -1022.
source

const MIN_EXPONENT: i64 = _

The smallest possible exponent of a float. This is $2-2^{E-1}-M$.

  • For f32s, this is -149.
  • For f64s, this is -1074.
source

const MAX_EXPONENT: i64 = _

The largest possible exponent of a float. This is $2^{E-1}-1$.

  • For f32s, this is 127.
  • For f64s, this is 1023.

Required Methods§

Provided Methods§

source

fn is_negative_zero(self) -> bool

Tests whether self is negative zero.

§Worst-case complexity

Constant time and additional memory.

§Examples
use malachite_base::num::basic::floats::PrimitiveFloat;

assert!((-0.0).is_negative_zero());
assert!(!0.0.is_negative_zero());
assert!(!1.0.is_negative_zero());
assert!(!f32::NAN.is_negative_zero());
assert!(!f32::INFINITY.is_negative_zero());
source

fn abs_negative_zero(self) -> Self

If self is negative zero, returns positive zero; otherwise, returns self.

§Worst-case complexity

Constant time and additional memory.

§Examples
use malachite_base::num::basic::floats::PrimitiveFloat;
use malachite_base::num::float::NiceFloat;

assert_eq!(NiceFloat((-0.0).abs_negative_zero()), NiceFloat(0.0));
assert_eq!(NiceFloat(0.0.abs_negative_zero()), NiceFloat(0.0));
assert_eq!(NiceFloat(1.0.abs_negative_zero()), NiceFloat(1.0));
assert_eq!(NiceFloat((-1.0).abs_negative_zero()), NiceFloat(-1.0));
assert_eq!(NiceFloat(f32::NAN.abs_negative_zero()), NiceFloat(f32::NAN));
source

fn abs_negative_zero_assign(&mut self)

If self is negative zero, replaces it with positive zero; otherwise, leaves self unchanged.

§Worst-case complexity

Constant time and additional memory.

§Examples
use malachite_base::num::basic::floats::PrimitiveFloat;
use malachite_base::num::float::NiceFloat;

let mut f = -0.0;
f.abs_negative_zero_assign();
assert_eq!(NiceFloat(f), NiceFloat(0.0));

let mut f = 0.0;
f.abs_negative_zero_assign();
assert_eq!(NiceFloat(f), NiceFloat(0.0));

let mut f = 1.0;
f.abs_negative_zero_assign();
assert_eq!(NiceFloat(f), NiceFloat(1.0));

let mut f = -1.0;
f.abs_negative_zero_assign();
assert_eq!(NiceFloat(f), NiceFloat(-1.0));

let mut f = f32::NAN;
f.abs_negative_zero_assign();
assert_eq!(NiceFloat(f), NiceFloat(f32::NAN));
source

fn next_higher(self) -> Self

Returns the smallest float larger than self.

Passing -0.0 returns 0.0; passing NaN or positive infinity panics.

§Worst-case complexity

Constant time and additional memory.

§Panics

Panics if self is NaN or positive infinity.

§Examples
use malachite_base::num::basic::floats::PrimitiveFloat;
use malachite_base::num::float::NiceFloat;

assert_eq!(NiceFloat((-0.0f32).next_higher()), NiceFloat(0.0));
assert_eq!(NiceFloat(0.0f32.next_higher()), NiceFloat(1.0e-45));
assert_eq!(NiceFloat(1.0f32.next_higher()), NiceFloat(1.0000001));
assert_eq!(NiceFloat((-1.0f32).next_higher()), NiceFloat(-0.99999994));
source

fn next_lower(self) -> Self

Returns the largest float smaller than self.

Passing 0.0 returns -0.0; passing NaN or negative infinity panics.

§Worst-case complexity

Constant time and additional memory.

§Panics

Panics if self is NaN or negative infinity.

§Examples
use malachite_base::num::basic::floats::PrimitiveFloat;
use malachite_base::num::float::NiceFloat;

assert_eq!(NiceFloat(0.0f32.next_lower()), NiceFloat(-0.0));
assert_eq!(NiceFloat((-0.0f32).next_lower()), NiceFloat(-1.0e-45));
assert_eq!(NiceFloat(1.0f32.next_lower()), NiceFloat(0.99999994));
assert_eq!(NiceFloat((-1.0f32).next_lower()), NiceFloat(-1.0000001));
source

fn to_ordered_representation(self) -> u64

Maps self to an integer. The map preserves ordering, and adjacent floats are mapped to adjacent integers.

Negative infinity is mapped to 0, and positive infinity is mapped to the largest value, LARGEST_ORDERED_REPRESENTATION. Negative and positive zero are mapped to distinct adjacent values. Passing in NaN panics.

The inverse operation is from_ordered_representation.

§Worst-case complexity

Constant time and additional memory.

§Panics

Panics if self is NaN.

§Examples
use malachite_base::num::basic::floats::PrimitiveFloat;
use malachite_base::num::basic::traits::NegativeInfinity;

assert_eq!(f32::NEGATIVE_INFINITY.to_ordered_representation(), 0);
assert_eq!((-0.0f32).to_ordered_representation(), 2139095040);
assert_eq!(0.0f32.to_ordered_representation(), 2139095041);
assert_eq!(1.0f32.to_ordered_representation(), 3204448257);
assert_eq!(f32::INFINITY.to_ordered_representation(), 4278190081);
source

fn from_ordered_representation(n: u64) -> Self

Maps a non-negative integer, less than or equal to LARGEST_ORDERED_REPRESENTATION, to a float. The map preserves ordering, and adjacent integers are mapped to adjacent floats.

Zero is mapped to negative infinity, and LARGEST_ORDERED_REPRESENTATION is mapped to positive infinity. Negative and positive zero are produced by two distinct adjacent integers. NaN is never produced.

The inverse operation is to_ordered_representation.

§Worst-case complexity

Constant time and additional memory.

§Panics

Panics if self is greater than LARGEST_ORDERED_REPRESENTATION.

§Examples
use malachite_base::num::basic::floats::PrimitiveFloat;
use malachite_base::num::basic::traits::NegativeInfinity;

assert_eq!(f32::from_ordered_representation(0), f32::NEGATIVE_INFINITY);
assert_eq!(f32::from_ordered_representation(2139095040), -0.0f32);
assert_eq!(f32::from_ordered_representation(2139095041), 0.0f32);
assert_eq!(f32::from_ordered_representation(3204448257), 1.0f32);
assert_eq!(f32::from_ordered_representation(4278190081), f32::INFINITY);
source

fn precision(self) -> u64

Returns the precision of a nonzero finite floating-point number.

The precision is the number of significant bits of the integer mantissa. For example, the floats with precision 1 are the powers of 2, those with precision 2 are 3 times a power of 2, those with precision 3 are 5 or 7 times a power of 2, and so on.

§Worst-case complexity

Constant time and additional memory.

§Panics

Panics if self is zero, infinite, or NaN.

§Examples
use malachite_base::num::basic::floats::PrimitiveFloat;

assert_eq!(1.0.precision(), 1);
assert_eq!(2.0.precision(), 1);
assert_eq!(3.0.precision(), 2);
assert_eq!(1.5.precision(), 2);
assert_eq!(1.234f32.precision(), 23);
source

fn max_precision_for_sci_exponent(exponent: i64) -> u64

Given a scientific exponent, returns the largest possible precision for a float with that exponent.

See the documentation of the precision function for a definition of precision.

For exponents greater than or equal to MIN_NORMAL_EXPONENT, the maximum precision is one more than the mantissa width. For smaller exponents (corresponding to the subnormal range), the precision is lower.

§Worst-case complexity

Constant time and additional memory.

§Panics

Panics if exponent is less than MIN_EXPONENT or greater than MAX_EXPONENT.

§Examples
use malachite_base::num::basic::floats::PrimitiveFloat;

assert_eq!(f32::max_precision_for_sci_exponent(0), 24);
assert_eq!(f32::max_precision_for_sci_exponent(127), 24);
assert_eq!(f32::max_precision_for_sci_exponent(-149), 1);
assert_eq!(f32::max_precision_for_sci_exponent(-148), 2);
assert_eq!(f32::max_precision_for_sci_exponent(-147), 3);

Object Safety§

This trait is not object safe.

Implementations on Foreign Types§

source§

impl PrimitiveFloat for f32

source§

const WIDTH: u64 = 32u64

source§

const MANTISSA_WIDTH: u64 = 23u64

source§

const MAX_FINITE: Self = 3.40282347E+38f32

source§

const MIN_POSITIVE_SUBNORMAL: Self = 1.40129846E-45f32

source§

const MAX_SUBNORMAL: Self = 1.17549421E-38f32

source§

const MIN_POSITIVE_NORMAL: Self = 1.17549435E-38f32

source§

const SMALLEST_UNREPRESENTABLE_UINT: u64 = 16_777_217u64

source§

const LARGEST_ORDERED_REPRESENTATION: u64 = 4_278_190_081u64

source§

fn is_nan(self) -> bool

source§

fn is_infinite(self) -> bool

source§

fn is_finite(self) -> bool

source§

fn is_normal(self) -> bool

source§

fn is_sign_positive(self) -> bool

source§

fn is_sign_negative(self) -> bool

source§

fn classify(self) -> FpCategory

source§

fn to_bits(self) -> u64

source§

fn from_bits(v: u64) -> f32

source§

impl PrimitiveFloat for f64

source§

const WIDTH: u64 = 64u64

source§

const MANTISSA_WIDTH: u64 = 52u64

source§

const MAX_FINITE: Self = 1.7976931348623157E+308f64

source§

const MIN_POSITIVE_SUBNORMAL: Self = 4.9406564584124654E-324f64

source§

const MAX_SUBNORMAL: Self = 2.2250738585072009E-308f64

source§

const MIN_POSITIVE_NORMAL: Self = 2.2250738585072014E-308f64

source§

const SMALLEST_UNREPRESENTABLE_UINT: u64 = 9_007_199_254_740_993u64

source§

const LARGEST_ORDERED_REPRESENTATION: u64 = 18_437_736_874_454_810_625u64

source§

fn is_nan(self) -> bool

source§

fn is_infinite(self) -> bool

source§

fn is_finite(self) -> bool

source§

fn is_normal(self) -> bool

source§

fn is_sign_positive(self) -> bool

source§

fn is_sign_negative(self) -> bool

source§

fn classify(self) -> FpCategory

source§

fn to_bits(self) -> u64

source§

fn from_bits(v: u64) -> f64

Implementors§