pub fn mul_block<NQ>(src1: *mut f32, src2: *mut f32, dst: *mut f32, nquads: NQ) where NQ: TryInto<u32>, <NQ as TryInto<u32>>::Error: Debug,