pub fn norm_l2_impl<T: AsPrimitive<Output>, Output: Float + Sum + 'static + AddAssign, const LANES: usize>( vector: &[T], ) -> Output
NOTE: this is only pub for benchmarking purposes