1use arrayref::{array_mut_ref, array_ref};
17use core::mem::size_of;
18use paste::paste;
19
20pub const FL_ORDER: [usize; 8] = [0, 4, 2, 6, 1, 5, 3, 7];
21
22pub trait FastLanes: Sized + Copy {
23 const T: usize = size_of::<Self>() * 8;
24 const LANES: usize = 1024 / Self::T;
25}
26
27impl FastLanes for u8 {}
29impl FastLanes for u16 {}
30impl FastLanes for u32 {}
31impl FastLanes for u64 {}
32
33macro_rules! pack {
34 ($T:ty, $W:expr, $packed:expr, $lane:expr, | $_1:tt $idx:ident | $($body:tt)*) => {
35 macro_rules! __kernel__ {( $_1 $idx:ident ) => ( $($body)* )}
36 {
37 use paste::paste;
38
39 const T: usize = <$T>::T;
41
42 #[inline(always)]
43 fn index(row: usize, lane: usize) -> usize {
44 let o = row / 8;
45 let s = row % 8;
46 (FL_ORDER[o] * 16) + (s * 128) + lane
47 }
48
49 if $W == 0 {
50 } else if $W == T {
52 paste!(seq_t!(row in $T {
54 let idx = index(row, $lane);
55 $packed[<$T>::LANES * row + $lane] = __kernel__!(idx);
56 }));
57 } else {
58 let mask: $T = (1 << $W) - 1;
60
61 let mut tmp: $T = 0;
63
64 paste!(seq_t!(row in $T {
68 let idx = index(row, $lane);
69 let src = __kernel__!(idx);
70 let src = src & mask;
71
72 if row == 0 {
74 tmp = src;
75 } else {
76 tmp |= src << (row * $W) % T;
77 }
78
79 let curr_word: usize = (row * $W) / T;
82 let next_word: usize = ((row + 1) * $W) / T;
83
84 #[allow(unused_assignments)]
85 if next_word > curr_word {
86 $packed[<$T>::LANES * curr_word + $lane] = tmp;
87 let remaining_bits: usize = ((row + 1) * $W) % T;
88 tmp = src >> $W - remaining_bits;
90 }
91 }));
92 }
93 }
94 };
95}
96
97macro_rules! unpack {
98 ($T:ty, $W:expr, $packed:expr, $lane:expr, | $_1:tt $idx:ident, $_2:tt $elem:ident | $($body:tt)*) => {
99 macro_rules! __kernel__ {( $_1 $idx:ident, $_2 $elem:ident ) => ( $($body)* )}
100 {
101 use paste::paste;
102
103 const T: usize = <$T>::T;
105
106 #[inline(always)]
107 fn index(row: usize, lane: usize) -> usize {
108 let o = row / 8;
109 let s = row % 8;
110 (FL_ORDER[o] * 16) + (s * 128) + lane
111 }
112
113 if $W == 0 {
114 paste!(seq_t!(row in $T {
117 let idx = index(row, $lane);
118 let zero: $T = 0;
119 __kernel__!(idx, zero);
120 }));
121 } else if $W == T {
122 paste!(seq_t!(row in $T {
124 let idx = index(row, $lane);
125 let src = $packed[<$T>::LANES * row + $lane];
126 __kernel__!(idx, src);
127 }));
128 } else {
129 #[inline]
130 fn mask(width: usize) -> $T {
131 if width == T { <$T>::MAX } else { (1 << (width % T)) - 1 }
132 }
133
134 let mut src: $T = $packed[$lane];
135 let mut tmp: $T;
136
137 paste!(seq_t!(row in $T {
138 let curr_word: usize = (row * $W) / T;
140 let next_word = ((row + 1) * $W) / T;
141
142 let shift = (row * $W) % T;
143
144 if next_word > curr_word {
145 let remaining_bits = ((row + 1) * $W) % T;
148 let current_bits = $W - remaining_bits;
149 tmp = (src >> shift) & mask(current_bits);
150
151 if next_word < $W {
152 src = $packed[<$T>::LANES * next_word + $lane];
154 tmp |= (src & mask(remaining_bits)) << current_bits;
156 }
157 } else {
158 tmp = (src >> shift) & mask($W);
160 }
161
162 let idx = index(row, $lane);
164 __kernel__!(idx, tmp);
165 }));
166 }
167 }
168 };
169}
170
171macro_rules! seq_t {
173 ($ident:ident in u8 $body:tt) => {seq_macro::seq!($ident in 0..8 $body)};
174 ($ident:ident in u16 $body:tt) => {seq_macro::seq!($ident in 0..16 $body)};
175 ($ident:ident in u32 $body:tt) => {seq_macro::seq!($ident in 0..32 $body)};
176 ($ident:ident in u64 $body:tt) => {seq_macro::seq!($ident in 0..64 $body)};
177}
178
179pub trait BitPacking: FastLanes {
181 unsafe fn unchecked_pack(width: usize, input: &[Self], output: &mut [Self]);
189
190 unsafe fn unchecked_unpack(width: usize, input: &[Self], output: &mut [Self]);
198}
199
200impl BitPacking for u8 {
201 unsafe fn unchecked_pack(width: usize, input: &[Self], output: &mut [Self]) {
202 let packed_len = 128 * width / size_of::<Self>();
203 debug_assert_eq!(
204 output.len(),
205 packed_len,
206 "Output buffer must be of size 1024 * W / T"
207 );
208 debug_assert_eq!(input.len(), 1024, "Input buffer must be of size 1024");
209 debug_assert!(
210 width <= Self::T,
211 "Width must be less than or equal to {}",
212 Self::T
213 );
214
215 match width {
216 1 => pack_8_1(
217 array_ref![input, 0, 1024],
218 array_mut_ref![output, 0, 1024 / 8],
219 ),
220 2 => pack_8_2(
221 array_ref![input, 0, 1024],
222 array_mut_ref![output, 0, 1024 * 2 / 8],
223 ),
224 3 => pack_8_3(
225 array_ref![input, 0, 1024],
226 array_mut_ref![output, 0, 1024 * 3 / 8],
227 ),
228 4 => pack_8_4(
229 array_ref![input, 0, 1024],
230 array_mut_ref![output, 0, 1024 * 4 / 8],
231 ),
232 5 => pack_8_5(
233 array_ref![input, 0, 1024],
234 array_mut_ref![output, 0, 1024 * 5 / 8],
235 ),
236 6 => pack_8_6(
237 array_ref![input, 0, 1024],
238 array_mut_ref![output, 0, 1024 * 6 / 8],
239 ),
240 7 => pack_8_7(
241 array_ref![input, 0, 1024],
242 array_mut_ref![output, 0, 1024 * 7 / 8],
243 ),
244 8 => pack_8_8(
245 array_ref![input, 0, 1024],
246 array_mut_ref![output, 0, 1024 * 8 / 8],
247 ),
248
249 _ => unreachable!("Unsupported width: {}", width),
250 }
251 }
252
253 unsafe fn unchecked_unpack(width: usize, input: &[Self], output: &mut [Self]) {
254 let packed_len = 128 * width / size_of::<Self>();
255 debug_assert_eq!(
256 input.len(),
257 packed_len,
258 "Input buffer must be of size 1024 * W / T"
259 );
260 debug_assert_eq!(output.len(), 1024, "Output buffer must be of size 1024");
261 debug_assert!(
262 width <= Self::T,
263 "Width must be less than or equal to {}",
264 Self::T
265 );
266
267 match width {
268 1 => unpack_8_1(
269 array_ref![input, 0, 1024 / 8],
270 array_mut_ref![output, 0, 1024],
271 ),
272 2 => unpack_8_2(
273 array_ref![input, 0, 1024 * 2 / 8],
274 array_mut_ref![output, 0, 1024],
275 ),
276 3 => unpack_8_3(
277 array_ref![input, 0, 1024 * 3 / 8],
278 array_mut_ref![output, 0, 1024],
279 ),
280 4 => unpack_8_4(
281 array_ref![input, 0, 1024 * 4 / 8],
282 array_mut_ref![output, 0, 1024],
283 ),
284 5 => unpack_8_5(
285 array_ref![input, 0, 1024 * 5 / 8],
286 array_mut_ref![output, 0, 1024],
287 ),
288 6 => unpack_8_6(
289 array_ref![input, 0, 1024 * 6 / 8],
290 array_mut_ref![output, 0, 1024],
291 ),
292 7 => unpack_8_7(
293 array_ref![input, 0, 1024 * 7 / 8],
294 array_mut_ref![output, 0, 1024],
295 ),
296 8 => unpack_8_8(
297 array_ref![input, 0, 1024 * 8 / 8],
298 array_mut_ref![output, 0, 1024],
299 ),
300
301 _ => unreachable!("Unsupported width: {}", width),
302 }
303 }
304}
305
306impl BitPacking for u16 {
307 unsafe fn unchecked_pack(width: usize, input: &[Self], output: &mut [Self]) {
308 let packed_len = 128 * width / size_of::<Self>();
309 debug_assert_eq!(
310 output.len(),
311 packed_len,
312 "Output buffer must be of size 1024 * W / T"
313 );
314 debug_assert_eq!(input.len(), 1024, "Input buffer must be of size 1024");
315 debug_assert!(
316 width <= Self::T,
317 "Width must be less than or equal to {}",
318 Self::T
319 );
320
321 match width {
322 1 => pack_16_1(
323 array_ref![input, 0, 1024],
324 array_mut_ref![output, 0, 1024 / 16],
325 ),
326 2 => pack_16_2(
327 array_ref![input, 0, 1024],
328 array_mut_ref![output, 0, 1024 * 2 / 16],
329 ),
330 3 => pack_16_3(
331 array_ref![input, 0, 1024],
332 array_mut_ref![output, 0, 1024 * 3 / 16],
333 ),
334 4 => pack_16_4(
335 array_ref![input, 0, 1024],
336 array_mut_ref![output, 0, 1024 * 4 / 16],
337 ),
338 5 => pack_16_5(
339 array_ref![input, 0, 1024],
340 array_mut_ref![output, 0, 1024 * 5 / 16],
341 ),
342 6 => pack_16_6(
343 array_ref![input, 0, 1024],
344 array_mut_ref![output, 0, 1024 * 6 / 16],
345 ),
346 7 => pack_16_7(
347 array_ref![input, 0, 1024],
348 array_mut_ref![output, 0, 1024 * 7 / 16],
349 ),
350 8 => pack_16_8(
351 array_ref![input, 0, 1024],
352 array_mut_ref![output, 0, 1024 * 8 / 16],
353 ),
354 9 => pack_16_9(
355 array_ref![input, 0, 1024],
356 array_mut_ref![output, 0, 1024 * 9 / 16],
357 ),
358
359 10 => pack_16_10(
360 array_ref![input, 0, 1024],
361 array_mut_ref![output, 0, 1024 * 10 / 16],
362 ),
363 11 => pack_16_11(
364 array_ref![input, 0, 1024],
365 array_mut_ref![output, 0, 1024 * 11 / 16],
366 ),
367 12 => pack_16_12(
368 array_ref![input, 0, 1024],
369 array_mut_ref![output, 0, 1024 * 12 / 16],
370 ),
371 13 => pack_16_13(
372 array_ref![input, 0, 1024],
373 array_mut_ref![output, 0, 1024 * 13 / 16],
374 ),
375 14 => pack_16_14(
376 array_ref![input, 0, 1024],
377 array_mut_ref![output, 0, 1024 * 14 / 16],
378 ),
379 15 => pack_16_15(
380 array_ref![input, 0, 1024],
381 array_mut_ref![output, 0, 1024 * 15 / 16],
382 ),
383 16 => pack_16_16(
384 array_ref![input, 0, 1024],
385 array_mut_ref![output, 0, 1024 * 16 / 16],
386 ),
387
388 _ => unreachable!("Unsupported width: {}", width),
389 }
390 }
391
392 unsafe fn unchecked_unpack(width: usize, input: &[Self], output: &mut [Self]) {
393 let packed_len = 128 * width / size_of::<Self>();
394 debug_assert_eq!(
395 input.len(),
396 packed_len,
397 "Input buffer must be of size 1024 * W / T"
398 );
399 debug_assert_eq!(output.len(), 1024, "Output buffer must be of size 1024");
400 debug_assert!(
401 width <= Self::T,
402 "Width must be less than or equal to {}",
403 Self::T
404 );
405
406 match width {
407 1 => unpack_16_1(
408 array_ref![input, 0, 1024 / 16],
409 array_mut_ref![output, 0, 1024],
410 ),
411 2 => unpack_16_2(
412 array_ref![input, 0, 1024 * 2 / 16],
413 array_mut_ref![output, 0, 1024],
414 ),
415 3 => unpack_16_3(
416 array_ref![input, 0, 1024 * 3 / 16],
417 array_mut_ref![output, 0, 1024],
418 ),
419 4 => unpack_16_4(
420 array_ref![input, 0, 1024 * 4 / 16],
421 array_mut_ref![output, 0, 1024],
422 ),
423 5 => unpack_16_5(
424 array_ref![input, 0, 1024 * 5 / 16],
425 array_mut_ref![output, 0, 1024],
426 ),
427 6 => unpack_16_6(
428 array_ref![input, 0, 1024 * 6 / 16],
429 array_mut_ref![output, 0, 1024],
430 ),
431 7 => unpack_16_7(
432 array_ref![input, 0, 1024 * 7 / 16],
433 array_mut_ref![output, 0, 1024],
434 ),
435 8 => unpack_16_8(
436 array_ref![input, 0, 1024 * 8 / 16],
437 array_mut_ref![output, 0, 1024],
438 ),
439 9 => unpack_16_9(
440 array_ref![input, 0, 1024 * 9 / 16],
441 array_mut_ref![output, 0, 1024],
442 ),
443
444 10 => unpack_16_10(
445 array_ref![input, 0, 1024 * 10 / 16],
446 array_mut_ref![output, 0, 1024],
447 ),
448 11 => unpack_16_11(
449 array_ref![input, 0, 1024 * 11 / 16],
450 array_mut_ref![output, 0, 1024],
451 ),
452 12 => unpack_16_12(
453 array_ref![input, 0, 1024 * 12 / 16],
454 array_mut_ref![output, 0, 1024],
455 ),
456 13 => unpack_16_13(
457 array_ref![input, 0, 1024 * 13 / 16],
458 array_mut_ref![output, 0, 1024],
459 ),
460 14 => unpack_16_14(
461 array_ref![input, 0, 1024 * 14 / 16],
462 array_mut_ref![output, 0, 1024],
463 ),
464 15 => unpack_16_15(
465 array_ref![input, 0, 1024 * 15 / 16],
466 array_mut_ref![output, 0, 1024],
467 ),
468 16 => unpack_16_16(
469 array_ref![input, 0, 1024 * 16 / 16],
470 array_mut_ref![output, 0, 1024],
471 ),
472
473 _ => unreachable!("Unsupported width: {}", width),
474 }
475 }
476}
477
478impl BitPacking for u32 {
479 unsafe fn unchecked_pack(width: usize, input: &[Self], output: &mut [Self]) {
480 let packed_len = 128 * width / size_of::<Self>();
481 debug_assert_eq!(
482 output.len(),
483 packed_len,
484 "Output buffer must be of size 1024 * W / T"
485 );
486 debug_assert_eq!(input.len(), 1024, "Input buffer must be of size 1024");
487 debug_assert!(
488 width <= Self::T,
489 "Width must be less than or equal to {}",
490 Self::T
491 );
492
493 match width {
494 1 => pack_32_1(
495 array_ref![input, 0, 1024],
496 array_mut_ref![output, 0, 1024 / 32],
497 ),
498 2 => pack_32_2(
499 array_ref![input, 0, 1024],
500 array_mut_ref![output, 0, 1024 * 2 / 32],
501 ),
502 3 => pack_32_3(
503 array_ref![input, 0, 1024],
504 array_mut_ref![output, 0, 1024 * 3 / 32],
505 ),
506 4 => pack_32_4(
507 array_ref![input, 0, 1024],
508 array_mut_ref![output, 0, 1024 * 4 / 32],
509 ),
510 5 => pack_32_5(
511 array_ref![input, 0, 1024],
512 array_mut_ref![output, 0, 1024 * 5 / 32],
513 ),
514 6 => pack_32_6(
515 array_ref![input, 0, 1024],
516 array_mut_ref![output, 0, 1024 * 6 / 32],
517 ),
518 7 => pack_32_7(
519 array_ref![input, 0, 1024],
520 array_mut_ref![output, 0, 1024 * 7 / 32],
521 ),
522 8 => pack_32_8(
523 array_ref![input, 0, 1024],
524 array_mut_ref![output, 0, 1024 * 8 / 32],
525 ),
526 9 => pack_32_9(
527 array_ref![input, 0, 1024],
528 array_mut_ref![output, 0, 1024 * 9 / 32],
529 ),
530
531 10 => pack_32_10(
532 array_ref![input, 0, 1024],
533 array_mut_ref![output, 0, 1024 * 10 / 32],
534 ),
535 11 => pack_32_11(
536 array_ref![input, 0, 1024],
537 array_mut_ref![output, 0, 1024 * 11 / 32],
538 ),
539 12 => pack_32_12(
540 array_ref![input, 0, 1024],
541 array_mut_ref![output, 0, 1024 * 12 / 32],
542 ),
543 13 => pack_32_13(
544 array_ref![input, 0, 1024],
545 array_mut_ref![output, 0, 1024 * 13 / 32],
546 ),
547 14 => pack_32_14(
548 array_ref![input, 0, 1024],
549 array_mut_ref![output, 0, 1024 * 14 / 32],
550 ),
551 15 => pack_32_15(
552 array_ref![input, 0, 1024],
553 array_mut_ref![output, 0, 1024 * 15 / 32],
554 ),
555 16 => pack_32_16(
556 array_ref![input, 0, 1024],
557 array_mut_ref![output, 0, 1024 * 16 / 32],
558 ),
559 17 => pack_32_17(
560 array_ref![input, 0, 1024],
561 array_mut_ref![output, 0, 1024 * 17 / 32],
562 ),
563 18 => pack_32_18(
564 array_ref![input, 0, 1024],
565 array_mut_ref![output, 0, 1024 * 18 / 32],
566 ),
567 19 => pack_32_19(
568 array_ref![input, 0, 1024],
569 array_mut_ref![output, 0, 1024 * 19 / 32],
570 ),
571
572 20 => pack_32_20(
573 array_ref![input, 0, 1024],
574 array_mut_ref![output, 0, 1024 * 20 / 32],
575 ),
576 21 => pack_32_21(
577 array_ref![input, 0, 1024],
578 array_mut_ref![output, 0, 1024 * 21 / 32],
579 ),
580 22 => pack_32_22(
581 array_ref![input, 0, 1024],
582 array_mut_ref![output, 0, 1024 * 22 / 32],
583 ),
584 23 => pack_32_23(
585 array_ref![input, 0, 1024],
586 array_mut_ref![output, 0, 1024 * 23 / 32],
587 ),
588 24 => pack_32_24(
589 array_ref![input, 0, 1024],
590 array_mut_ref![output, 0, 1024 * 24 / 32],
591 ),
592 25 => pack_32_25(
593 array_ref![input, 0, 1024],
594 array_mut_ref![output, 0, 1024 * 25 / 32],
595 ),
596 26 => pack_32_26(
597 array_ref![input, 0, 1024],
598 array_mut_ref![output, 0, 1024 * 26 / 32],
599 ),
600 27 => pack_32_27(
601 array_ref![input, 0, 1024],
602 array_mut_ref![output, 0, 1024 * 27 / 32],
603 ),
604 28 => pack_32_28(
605 array_ref![input, 0, 1024],
606 array_mut_ref![output, 0, 1024 * 28 / 32],
607 ),
608 29 => pack_32_29(
609 array_ref![input, 0, 1024],
610 array_mut_ref![output, 0, 1024 * 29 / 32],
611 ),
612
613 30 => pack_32_30(
614 array_ref![input, 0, 1024],
615 array_mut_ref![output, 0, 1024 * 30 / 32],
616 ),
617 31 => pack_32_31(
618 array_ref![input, 0, 1024],
619 array_mut_ref![output, 0, 1024 * 31 / 32],
620 ),
621 32 => pack_32_32(
622 array_ref![input, 0, 1024],
623 array_mut_ref![output, 0, 1024 * 32 / 32],
624 ),
625
626 _ => unreachable!("Unsupported width: {}", width),
627 }
628 }
629
630 unsafe fn unchecked_unpack(width: usize, input: &[Self], output: &mut [Self]) {
631 let packed_len = 128 * width / size_of::<Self>();
632 debug_assert_eq!(
633 input.len(),
634 packed_len,
635 "Input buffer must be of size 1024 * W / T"
636 );
637 debug_assert_eq!(output.len(), 1024, "Output buffer must be of size 1024");
638 debug_assert!(
639 width <= Self::T,
640 "Width must be less than or equal to {}",
641 Self::T
642 );
643
644 match width {
645 1 => unpack_32_1(
646 array_ref![input, 0, 1024 / 32],
647 array_mut_ref![output, 0, 1024],
648 ),
649 2 => unpack_32_2(
650 array_ref![input, 0, 1024 * 2 / 32],
651 array_mut_ref![output, 0, 1024],
652 ),
653 3 => unpack_32_3(
654 array_ref![input, 0, 1024 * 3 / 32],
655 array_mut_ref![output, 0, 1024],
656 ),
657 4 => unpack_32_4(
658 array_ref![input, 0, 1024 * 4 / 32],
659 array_mut_ref![output, 0, 1024],
660 ),
661 5 => unpack_32_5(
662 array_ref![input, 0, 1024 * 5 / 32],
663 array_mut_ref![output, 0, 1024],
664 ),
665 6 => unpack_32_6(
666 array_ref![input, 0, 1024 * 6 / 32],
667 array_mut_ref![output, 0, 1024],
668 ),
669 7 => unpack_32_7(
670 array_ref![input, 0, 1024 * 7 / 32],
671 array_mut_ref![output, 0, 1024],
672 ),
673 8 => unpack_32_8(
674 array_ref![input, 0, 1024 * 8 / 32],
675 array_mut_ref![output, 0, 1024],
676 ),
677 9 => unpack_32_9(
678 array_ref![input, 0, 1024 * 9 / 32],
679 array_mut_ref![output, 0, 1024],
680 ),
681
682 10 => unpack_32_10(
683 array_ref![input, 0, 1024 * 10 / 32],
684 array_mut_ref![output, 0, 1024],
685 ),
686 11 => unpack_32_11(
687 array_ref![input, 0, 1024 * 11 / 32],
688 array_mut_ref![output, 0, 1024],
689 ),
690 12 => unpack_32_12(
691 array_ref![input, 0, 1024 * 12 / 32],
692 array_mut_ref![output, 0, 1024],
693 ),
694 13 => unpack_32_13(
695 array_ref![input, 0, 1024 * 13 / 32],
696 array_mut_ref![output, 0, 1024],
697 ),
698 14 => unpack_32_14(
699 array_ref![input, 0, 1024 * 14 / 32],
700 array_mut_ref![output, 0, 1024],
701 ),
702 15 => unpack_32_15(
703 array_ref![input, 0, 1024 * 15 / 32],
704 array_mut_ref![output, 0, 1024],
705 ),
706 16 => unpack_32_16(
707 array_ref![input, 0, 1024 * 16 / 32],
708 array_mut_ref![output, 0, 1024],
709 ),
710 17 => unpack_32_17(
711 array_ref![input, 0, 1024 * 17 / 32],
712 array_mut_ref![output, 0, 1024],
713 ),
714 18 => unpack_32_18(
715 array_ref![input, 0, 1024 * 18 / 32],
716 array_mut_ref![output, 0, 1024],
717 ),
718 19 => unpack_32_19(
719 array_ref![input, 0, 1024 * 19 / 32],
720 array_mut_ref![output, 0, 1024],
721 ),
722
723 20 => unpack_32_20(
724 array_ref![input, 0, 1024 * 20 / 32],
725 array_mut_ref![output, 0, 1024],
726 ),
727 21 => unpack_32_21(
728 array_ref![input, 0, 1024 * 21 / 32],
729 array_mut_ref![output, 0, 1024],
730 ),
731 22 => unpack_32_22(
732 array_ref![input, 0, 1024 * 22 / 32],
733 array_mut_ref![output, 0, 1024],
734 ),
735 23 => unpack_32_23(
736 array_ref![input, 0, 1024 * 23 / 32],
737 array_mut_ref![output, 0, 1024],
738 ),
739 24 => unpack_32_24(
740 array_ref![input, 0, 1024 * 24 / 32],
741 array_mut_ref![output, 0, 1024],
742 ),
743 25 => unpack_32_25(
744 array_ref![input, 0, 1024 * 25 / 32],
745 array_mut_ref![output, 0, 1024],
746 ),
747 26 => unpack_32_26(
748 array_ref![input, 0, 1024 * 26 / 32],
749 array_mut_ref![output, 0, 1024],
750 ),
751 27 => unpack_32_27(
752 array_ref![input, 0, 1024 * 27 / 32],
753 array_mut_ref![output, 0, 1024],
754 ),
755 28 => unpack_32_28(
756 array_ref![input, 0, 1024 * 28 / 32],
757 array_mut_ref![output, 0, 1024],
758 ),
759 29 => unpack_32_29(
760 array_ref![input, 0, 1024 * 29 / 32],
761 array_mut_ref![output, 0, 1024],
762 ),
763
764 30 => unpack_32_30(
765 array_ref![input, 0, 1024 * 30 / 32],
766 array_mut_ref![output, 0, 1024],
767 ),
768 31 => unpack_32_31(
769 array_ref![input, 0, 1024 * 31 / 32],
770 array_mut_ref![output, 0, 1024],
771 ),
772 32 => unpack_32_32(
773 array_ref![input, 0, 1024 * 32 / 32],
774 array_mut_ref![output, 0, 1024],
775 ),
776
777 _ => unreachable!("Unsupported width: {}", width),
778 }
779 }
780}
781
782impl BitPacking for u64 {
783 unsafe fn unchecked_pack(width: usize, input: &[Self], output: &mut [Self]) {
784 let packed_len = 128 * width / size_of::<Self>();
785 debug_assert_eq!(
786 output.len(),
787 packed_len,
788 "Output buffer must be of size 1024 * W / T"
789 );
790 debug_assert_eq!(input.len(), 1024, "Input buffer must be of size 1024");
791 debug_assert!(
792 width <= Self::T,
793 "Width must be less than or equal to {}",
794 Self::T
795 );
796
797 match width {
798 1 => pack_64_1(
799 array_ref![input, 0, 1024],
800 array_mut_ref![output, 0, 1024 / 64],
801 ),
802 2 => pack_64_2(
803 array_ref![input, 0, 1024],
804 array_mut_ref![output, 0, 1024 * 2 / 64],
805 ),
806 3 => pack_64_3(
807 array_ref![input, 0, 1024],
808 array_mut_ref![output, 0, 1024 * 3 / 64],
809 ),
810 4 => pack_64_4(
811 array_ref![input, 0, 1024],
812 array_mut_ref![output, 0, 1024 * 4 / 64],
813 ),
814 5 => pack_64_5(
815 array_ref![input, 0, 1024],
816 array_mut_ref![output, 0, 1024 * 5 / 64],
817 ),
818 6 => pack_64_6(
819 array_ref![input, 0, 1024],
820 array_mut_ref![output, 0, 1024 * 6 / 64],
821 ),
822 7 => pack_64_7(
823 array_ref![input, 0, 1024],
824 array_mut_ref![output, 0, 1024 * 7 / 64],
825 ),
826 8 => pack_64_8(
827 array_ref![input, 0, 1024],
828 array_mut_ref![output, 0, 1024 * 8 / 64],
829 ),
830 9 => pack_64_9(
831 array_ref![input, 0, 1024],
832 array_mut_ref![output, 0, 1024 * 9 / 64],
833 ),
834
835 10 => pack_64_10(
836 array_ref![input, 0, 1024],
837 array_mut_ref![output, 0, 1024 * 10 / 64],
838 ),
839 11 => pack_64_11(
840 array_ref![input, 0, 1024],
841 array_mut_ref![output, 0, 1024 * 11 / 64],
842 ),
843 12 => pack_64_12(
844 array_ref![input, 0, 1024],
845 array_mut_ref![output, 0, 1024 * 12 / 64],
846 ),
847 13 => pack_64_13(
848 array_ref![input, 0, 1024],
849 array_mut_ref![output, 0, 1024 * 13 / 64],
850 ),
851 14 => pack_64_14(
852 array_ref![input, 0, 1024],
853 array_mut_ref![output, 0, 1024 * 14 / 64],
854 ),
855 15 => pack_64_15(
856 array_ref![input, 0, 1024],
857 array_mut_ref![output, 0, 1024 * 15 / 64],
858 ),
859 16 => pack_64_16(
860 array_ref![input, 0, 1024],
861 array_mut_ref![output, 0, 1024 * 16 / 64],
862 ),
863 17 => pack_64_17(
864 array_ref![input, 0, 1024],
865 array_mut_ref![output, 0, 1024 * 17 / 64],
866 ),
867 18 => pack_64_18(
868 array_ref![input, 0, 1024],
869 array_mut_ref![output, 0, 1024 * 18 / 64],
870 ),
871 19 => pack_64_19(
872 array_ref![input, 0, 1024],
873 array_mut_ref![output, 0, 1024 * 19 / 64],
874 ),
875
876 20 => pack_64_20(
877 array_ref![input, 0, 1024],
878 array_mut_ref![output, 0, 1024 * 20 / 64],
879 ),
880 21 => pack_64_21(
881 array_ref![input, 0, 1024],
882 array_mut_ref![output, 0, 1024 * 21 / 64],
883 ),
884 22 => pack_64_22(
885 array_ref![input, 0, 1024],
886 array_mut_ref![output, 0, 1024 * 22 / 64],
887 ),
888 23 => pack_64_23(
889 array_ref![input, 0, 1024],
890 array_mut_ref![output, 0, 1024 * 23 / 64],
891 ),
892 24 => pack_64_24(
893 array_ref![input, 0, 1024],
894 array_mut_ref![output, 0, 1024 * 24 / 64],
895 ),
896 25 => pack_64_25(
897 array_ref![input, 0, 1024],
898 array_mut_ref![output, 0, 1024 * 25 / 64],
899 ),
900 26 => pack_64_26(
901 array_ref![input, 0, 1024],
902 array_mut_ref![output, 0, 1024 * 26 / 64],
903 ),
904 27 => pack_64_27(
905 array_ref![input, 0, 1024],
906 array_mut_ref![output, 0, 1024 * 27 / 64],
907 ),
908 28 => pack_64_28(
909 array_ref![input, 0, 1024],
910 array_mut_ref![output, 0, 1024 * 28 / 64],
911 ),
912 29 => pack_64_29(
913 array_ref![input, 0, 1024],
914 array_mut_ref![output, 0, 1024 * 29 / 64],
915 ),
916
917 30 => pack_64_30(
918 array_ref![input, 0, 1024],
919 array_mut_ref![output, 0, 1024 * 30 / 64],
920 ),
921 31 => pack_64_31(
922 array_ref![input, 0, 1024],
923 array_mut_ref![output, 0, 1024 * 31 / 64],
924 ),
925 32 => pack_64_32(
926 array_ref![input, 0, 1024],
927 array_mut_ref![output, 0, 1024 * 32 / 64],
928 ),
929 33 => pack_64_33(
930 array_ref![input, 0, 1024],
931 array_mut_ref![output, 0, 1024 * 33 / 64],
932 ),
933 34 => pack_64_34(
934 array_ref![input, 0, 1024],
935 array_mut_ref![output, 0, 1024 * 34 / 64],
936 ),
937 35 => pack_64_35(
938 array_ref![input, 0, 1024],
939 array_mut_ref![output, 0, 1024 * 35 / 64],
940 ),
941 36 => pack_64_36(
942 array_ref![input, 0, 1024],
943 array_mut_ref![output, 0, 1024 * 36 / 64],
944 ),
945 37 => pack_64_37(
946 array_ref![input, 0, 1024],
947 array_mut_ref![output, 0, 1024 * 37 / 64],
948 ),
949 38 => pack_64_38(
950 array_ref![input, 0, 1024],
951 array_mut_ref![output, 0, 1024 * 38 / 64],
952 ),
953 39 => pack_64_39(
954 array_ref![input, 0, 1024],
955 array_mut_ref![output, 0, 1024 * 39 / 64],
956 ),
957
958 40 => pack_64_40(
959 array_ref![input, 0, 1024],
960 array_mut_ref![output, 0, 1024 * 40 / 64],
961 ),
962 41 => pack_64_41(
963 array_ref![input, 0, 1024],
964 array_mut_ref![output, 0, 1024 * 41 / 64],
965 ),
966 42 => pack_64_42(
967 array_ref![input, 0, 1024],
968 array_mut_ref![output, 0, 1024 * 42 / 64],
969 ),
970 43 => pack_64_43(
971 array_ref![input, 0, 1024],
972 array_mut_ref![output, 0, 1024 * 43 / 64],
973 ),
974 44 => pack_64_44(
975 array_ref![input, 0, 1024],
976 array_mut_ref![output, 0, 1024 * 44 / 64],
977 ),
978 45 => pack_64_45(
979 array_ref![input, 0, 1024],
980 array_mut_ref![output, 0, 1024 * 45 / 64],
981 ),
982 46 => pack_64_46(
983 array_ref![input, 0, 1024],
984 array_mut_ref![output, 0, 1024 * 46 / 64],
985 ),
986 47 => pack_64_47(
987 array_ref![input, 0, 1024],
988 array_mut_ref![output, 0, 1024 * 47 / 64],
989 ),
990 48 => pack_64_48(
991 array_ref![input, 0, 1024],
992 array_mut_ref![output, 0, 1024 * 48 / 64],
993 ),
994 49 => pack_64_49(
995 array_ref![input, 0, 1024],
996 array_mut_ref![output, 0, 1024 * 49 / 64],
997 ),
998
999 50 => pack_64_50(
1000 array_ref![input, 0, 1024],
1001 array_mut_ref![output, 0, 1024 * 50 / 64],
1002 ),
1003 51 => pack_64_51(
1004 array_ref![input, 0, 1024],
1005 array_mut_ref![output, 0, 1024 * 51 / 64],
1006 ),
1007 52 => pack_64_52(
1008 array_ref![input, 0, 1024],
1009 array_mut_ref![output, 0, 1024 * 52 / 64],
1010 ),
1011 53 => pack_64_53(
1012 array_ref![input, 0, 1024],
1013 array_mut_ref![output, 0, 1024 * 53 / 64],
1014 ),
1015 54 => pack_64_54(
1016 array_ref![input, 0, 1024],
1017 array_mut_ref![output, 0, 1024 * 54 / 64],
1018 ),
1019 55 => pack_64_55(
1020 array_ref![input, 0, 1024],
1021 array_mut_ref![output, 0, 1024 * 55 / 64],
1022 ),
1023 56 => pack_64_56(
1024 array_ref![input, 0, 1024],
1025 array_mut_ref![output, 0, 1024 * 56 / 64],
1026 ),
1027 57 => pack_64_57(
1028 array_ref![input, 0, 1024],
1029 array_mut_ref![output, 0, 1024 * 57 / 64],
1030 ),
1031 58 => pack_64_58(
1032 array_ref![input, 0, 1024],
1033 array_mut_ref![output, 0, 1024 * 58 / 64],
1034 ),
1035 59 => pack_64_59(
1036 array_ref![input, 0, 1024],
1037 array_mut_ref![output, 0, 1024 * 59 / 64],
1038 ),
1039
1040 60 => pack_64_60(
1041 array_ref![input, 0, 1024],
1042 array_mut_ref![output, 0, 1024 * 60 / 64],
1043 ),
1044 61 => pack_64_61(
1045 array_ref![input, 0, 1024],
1046 array_mut_ref![output, 0, 1024 * 61 / 64],
1047 ),
1048 62 => pack_64_62(
1049 array_ref![input, 0, 1024],
1050 array_mut_ref![output, 0, 1024 * 62 / 64],
1051 ),
1052 63 => pack_64_63(
1053 array_ref![input, 0, 1024],
1054 array_mut_ref![output, 0, 1024 * 63 / 64],
1055 ),
1056 64 => pack_64_64(
1057 array_ref![input, 0, 1024],
1058 array_mut_ref![output, 0, 1024 * 64 / 64],
1059 ),
1060
1061 _ => unreachable!("Unsupported width: {}", width),
1062 }
1063 }
1064
1065 unsafe fn unchecked_unpack(width: usize, input: &[Self], output: &mut [Self]) {
1066 let packed_len = 128 * width / size_of::<Self>();
1067 debug_assert_eq!(
1068 input.len(),
1069 packed_len,
1070 "Input buffer must be of size 1024 * W / T"
1071 );
1072 debug_assert_eq!(output.len(), 1024, "Output buffer must be of size 1024");
1073 debug_assert!(
1074 width <= Self::T,
1075 "Width must be less than or equal to {}",
1076 Self::T
1077 );
1078
1079 match width {
1080 1 => unpack_64_1(
1081 array_ref![input, 0, 1024 / 64],
1082 array_mut_ref![output, 0, 1024],
1083 ),
1084 2 => unpack_64_2(
1085 array_ref![input, 0, 1024 * 2 / 64],
1086 array_mut_ref![output, 0, 1024],
1087 ),
1088 3 => unpack_64_3(
1089 array_ref![input, 0, 1024 * 3 / 64],
1090 array_mut_ref![output, 0, 1024],
1091 ),
1092 4 => unpack_64_4(
1093 array_ref![input, 0, 1024 * 4 / 64],
1094 array_mut_ref![output, 0, 1024],
1095 ),
1096 5 => unpack_64_5(
1097 array_ref![input, 0, 1024 * 5 / 64],
1098 array_mut_ref![output, 0, 1024],
1099 ),
1100 6 => unpack_64_6(
1101 array_ref![input, 0, 1024 * 6 / 64],
1102 array_mut_ref![output, 0, 1024],
1103 ),
1104 7 => unpack_64_7(
1105 array_ref![input, 0, 1024 * 7 / 64],
1106 array_mut_ref![output, 0, 1024],
1107 ),
1108 8 => unpack_64_8(
1109 array_ref![input, 0, 1024 * 8 / 64],
1110 array_mut_ref![output, 0, 1024],
1111 ),
1112 9 => unpack_64_9(
1113 array_ref![input, 0, 1024 * 9 / 64],
1114 array_mut_ref![output, 0, 1024],
1115 ),
1116
1117 10 => unpack_64_10(
1118 array_ref![input, 0, 1024 * 10 / 64],
1119 array_mut_ref![output, 0, 1024],
1120 ),
1121 11 => unpack_64_11(
1122 array_ref![input, 0, 1024 * 11 / 64],
1123 array_mut_ref![output, 0, 1024],
1124 ),
1125 12 => unpack_64_12(
1126 array_ref![input, 0, 1024 * 12 / 64],
1127 array_mut_ref![output, 0, 1024],
1128 ),
1129 13 => unpack_64_13(
1130 array_ref![input, 0, 1024 * 13 / 64],
1131 array_mut_ref![output, 0, 1024],
1132 ),
1133 14 => unpack_64_14(
1134 array_ref![input, 0, 1024 * 14 / 64],
1135 array_mut_ref![output, 0, 1024],
1136 ),
1137 15 => unpack_64_15(
1138 array_ref![input, 0, 1024 * 15 / 64],
1139 array_mut_ref![output, 0, 1024],
1140 ),
1141 16 => unpack_64_16(
1142 array_ref![input, 0, 1024 * 16 / 64],
1143 array_mut_ref![output, 0, 1024],
1144 ),
1145 17 => unpack_64_17(
1146 array_ref![input, 0, 1024 * 17 / 64],
1147 array_mut_ref![output, 0, 1024],
1148 ),
1149 18 => unpack_64_18(
1150 array_ref![input, 0, 1024 * 18 / 64],
1151 array_mut_ref![output, 0, 1024],
1152 ),
1153 19 => unpack_64_19(
1154 array_ref![input, 0, 1024 * 19 / 64],
1155 array_mut_ref![output, 0, 1024],
1156 ),
1157
1158 20 => unpack_64_20(
1159 array_ref![input, 0, 1024 * 20 / 64],
1160 array_mut_ref![output, 0, 1024],
1161 ),
1162 21 => unpack_64_21(
1163 array_ref![input, 0, 1024 * 21 / 64],
1164 array_mut_ref![output, 0, 1024],
1165 ),
1166 22 => unpack_64_22(
1167 array_ref![input, 0, 1024 * 22 / 64],
1168 array_mut_ref![output, 0, 1024],
1169 ),
1170 23 => unpack_64_23(
1171 array_ref![input, 0, 1024 * 23 / 64],
1172 array_mut_ref![output, 0, 1024],
1173 ),
1174 24 => unpack_64_24(
1175 array_ref![input, 0, 1024 * 24 / 64],
1176 array_mut_ref![output, 0, 1024],
1177 ),
1178 25 => unpack_64_25(
1179 array_ref![input, 0, 1024 * 25 / 64],
1180 array_mut_ref![output, 0, 1024],
1181 ),
1182 26 => unpack_64_26(
1183 array_ref![input, 0, 1024 * 26 / 64],
1184 array_mut_ref![output, 0, 1024],
1185 ),
1186 27 => unpack_64_27(
1187 array_ref![input, 0, 1024 * 27 / 64],
1188 array_mut_ref![output, 0, 1024],
1189 ),
1190 28 => unpack_64_28(
1191 array_ref![input, 0, 1024 * 28 / 64],
1192 array_mut_ref![output, 0, 1024],
1193 ),
1194 29 => unpack_64_29(
1195 array_ref![input, 0, 1024 * 29 / 64],
1196 array_mut_ref![output, 0, 1024],
1197 ),
1198
1199 30 => unpack_64_30(
1200 array_ref![input, 0, 1024 * 30 / 64],
1201 array_mut_ref![output, 0, 1024],
1202 ),
1203 31 => unpack_64_31(
1204 array_ref![input, 0, 1024 * 31 / 64],
1205 array_mut_ref![output, 0, 1024],
1206 ),
1207 32 => unpack_64_32(
1208 array_ref![input, 0, 1024 * 32 / 64],
1209 array_mut_ref![output, 0, 1024],
1210 ),
1211 33 => unpack_64_33(
1212 array_ref![input, 0, 1024 * 33 / 64],
1213 array_mut_ref![output, 0, 1024],
1214 ),
1215 34 => unpack_64_34(
1216 array_ref![input, 0, 1024 * 34 / 64],
1217 array_mut_ref![output, 0, 1024],
1218 ),
1219 35 => unpack_64_35(
1220 array_ref![input, 0, 1024 * 35 / 64],
1221 array_mut_ref![output, 0, 1024],
1222 ),
1223 36 => unpack_64_36(
1224 array_ref![input, 0, 1024 * 36 / 64],
1225 array_mut_ref![output, 0, 1024],
1226 ),
1227 37 => unpack_64_37(
1228 array_ref![input, 0, 1024 * 37 / 64],
1229 array_mut_ref![output, 0, 1024],
1230 ),
1231 38 => unpack_64_38(
1232 array_ref![input, 0, 1024 * 38 / 64],
1233 array_mut_ref![output, 0, 1024],
1234 ),
1235 39 => unpack_64_39(
1236 array_ref![input, 0, 1024 * 39 / 64],
1237 array_mut_ref![output, 0, 1024],
1238 ),
1239
1240 40 => unpack_64_40(
1241 array_ref![input, 0, 1024 * 40 / 64],
1242 array_mut_ref![output, 0, 1024],
1243 ),
1244 41 => unpack_64_41(
1245 array_ref![input, 0, 1024 * 41 / 64],
1246 array_mut_ref![output, 0, 1024],
1247 ),
1248 42 => unpack_64_42(
1249 array_ref![input, 0, 1024 * 42 / 64],
1250 array_mut_ref![output, 0, 1024],
1251 ),
1252 43 => unpack_64_43(
1253 array_ref![input, 0, 1024 * 43 / 64],
1254 array_mut_ref![output, 0, 1024],
1255 ),
1256 44 => unpack_64_44(
1257 array_ref![input, 0, 1024 * 44 / 64],
1258 array_mut_ref![output, 0, 1024],
1259 ),
1260 45 => unpack_64_45(
1261 array_ref![input, 0, 1024 * 45 / 64],
1262 array_mut_ref![output, 0, 1024],
1263 ),
1264 46 => unpack_64_46(
1265 array_ref![input, 0, 1024 * 46 / 64],
1266 array_mut_ref![output, 0, 1024],
1267 ),
1268 47 => unpack_64_47(
1269 array_ref![input, 0, 1024 * 47 / 64],
1270 array_mut_ref![output, 0, 1024],
1271 ),
1272 48 => unpack_64_48(
1273 array_ref![input, 0, 1024 * 48 / 64],
1274 array_mut_ref![output, 0, 1024],
1275 ),
1276 49 => unpack_64_49(
1277 array_ref![input, 0, 1024 * 49 / 64],
1278 array_mut_ref![output, 0, 1024],
1279 ),
1280
1281 50 => unpack_64_50(
1282 array_ref![input, 0, 1024 * 50 / 64],
1283 array_mut_ref![output, 0, 1024],
1284 ),
1285 51 => unpack_64_51(
1286 array_ref![input, 0, 1024 * 51 / 64],
1287 array_mut_ref![output, 0, 1024],
1288 ),
1289 52 => unpack_64_52(
1290 array_ref![input, 0, 1024 * 52 / 64],
1291 array_mut_ref![output, 0, 1024],
1292 ),
1293 53 => unpack_64_53(
1294 array_ref![input, 0, 1024 * 53 / 64],
1295 array_mut_ref![output, 0, 1024],
1296 ),
1297 54 => unpack_64_54(
1298 array_ref![input, 0, 1024 * 54 / 64],
1299 array_mut_ref![output, 0, 1024],
1300 ),
1301 55 => unpack_64_55(
1302 array_ref![input, 0, 1024 * 55 / 64],
1303 array_mut_ref![output, 0, 1024],
1304 ),
1305 56 => unpack_64_56(
1306 array_ref![input, 0, 1024 * 56 / 64],
1307 array_mut_ref![output, 0, 1024],
1308 ),
1309 57 => unpack_64_57(
1310 array_ref![input, 0, 1024 * 57 / 64],
1311 array_mut_ref![output, 0, 1024],
1312 ),
1313 58 => unpack_64_58(
1314 array_ref![input, 0, 1024 * 58 / 64],
1315 array_mut_ref![output, 0, 1024],
1316 ),
1317 59 => unpack_64_59(
1318 array_ref![input, 0, 1024 * 59 / 64],
1319 array_mut_ref![output, 0, 1024],
1320 ),
1321
1322 60 => unpack_64_60(
1323 array_ref![input, 0, 1024 * 60 / 64],
1324 array_mut_ref![output, 0, 1024],
1325 ),
1326 61 => unpack_64_61(
1327 array_ref![input, 0, 1024 * 61 / 64],
1328 array_mut_ref![output, 0, 1024],
1329 ),
1330 62 => unpack_64_62(
1331 array_ref![input, 0, 1024 * 62 / 64],
1332 array_mut_ref![output, 0, 1024],
1333 ),
1334 63 => unpack_64_63(
1335 array_ref![input, 0, 1024 * 63 / 64],
1336 array_mut_ref![output, 0, 1024],
1337 ),
1338 64 => unpack_64_64(
1339 array_ref![input, 0, 1024 * 64 / 64],
1340 array_mut_ref![output, 0, 1024],
1341 ),
1342
1343 _ => unreachable!("Unsupported width: {}", width),
1344 }
1345 }
1346}
1347
1348macro_rules! unpack_8 {
1349 ($name:ident, $bits:expr) => {
1350 fn $name(input: &[u8; 1024 * $bits / u8::T], output: &mut [u8; 1024]) {
1351 for lane in 0..u8::LANES {
1352 unpack!(u8, $bits, input, lane, |$idx, $elem| {
1353 output[$idx] = $elem;
1354 });
1355 }
1356 }
1357 };
1358}
1359
1360unpack_8!(unpack_8_1, 1);
1361unpack_8!(unpack_8_2, 2);
1362unpack_8!(unpack_8_3, 3);
1363unpack_8!(unpack_8_4, 4);
1364unpack_8!(unpack_8_5, 5);
1365unpack_8!(unpack_8_6, 6);
1366unpack_8!(unpack_8_7, 7);
1367unpack_8!(unpack_8_8, 8);
1368
1369macro_rules! pack_8 {
1370 ($name:ident, $bits:expr) => {
1371 fn $name(input: &[u8; 1024], output: &mut [u8; 1024 * $bits / u8::T]) {
1372 for lane in 0..u8::LANES {
1373 pack!(u8, $bits, output, lane, |$idx| { input[$idx] });
1374 }
1375 }
1376 };
1377}
1378pack_8!(pack_8_1, 1);
1379pack_8!(pack_8_2, 2);
1380pack_8!(pack_8_3, 3);
1381pack_8!(pack_8_4, 4);
1382pack_8!(pack_8_5, 5);
1383pack_8!(pack_8_6, 6);
1384pack_8!(pack_8_7, 7);
1385pack_8!(pack_8_8, 8);
1386
1387macro_rules! unpack_16 {
1388 ($name:ident, $bits:expr) => {
1389 fn $name(input: &[u16; 1024 * $bits / u16::T], output: &mut [u16; 1024]) {
1390 for lane in 0..u16::LANES {
1391 unpack!(u16, $bits, input, lane, |$idx, $elem| {
1392 output[$idx] = $elem;
1393 });
1394 }
1395 }
1396 };
1397}
1398
1399unpack_16!(unpack_16_1, 1);
1400unpack_16!(unpack_16_2, 2);
1401unpack_16!(unpack_16_3, 3);
1402unpack_16!(unpack_16_4, 4);
1403unpack_16!(unpack_16_5, 5);
1404unpack_16!(unpack_16_6, 6);
1405unpack_16!(unpack_16_7, 7);
1406unpack_16!(unpack_16_8, 8);
1407unpack_16!(unpack_16_9, 9);
1408unpack_16!(unpack_16_10, 10);
1409unpack_16!(unpack_16_11, 11);
1410unpack_16!(unpack_16_12, 12);
1411unpack_16!(unpack_16_13, 13);
1412unpack_16!(unpack_16_14, 14);
1413unpack_16!(unpack_16_15, 15);
1414unpack_16!(unpack_16_16, 16);
1415
1416macro_rules! pack_16 {
1417 ($name:ident, $bits:expr) => {
1418 fn $name(input: &[u16; 1024], output: &mut [u16; 1024 * $bits / u16::T]) {
1419 for lane in 0..u16::LANES {
1420 pack!(u16, $bits, output, lane, |$idx| { input[$idx] });
1421 }
1422 }
1423 };
1424}
1425
1426pack_16!(pack_16_1, 1);
1427pack_16!(pack_16_2, 2);
1428pack_16!(pack_16_3, 3);
1429pack_16!(pack_16_4, 4);
1430pack_16!(pack_16_5, 5);
1431pack_16!(pack_16_6, 6);
1432pack_16!(pack_16_7, 7);
1433pack_16!(pack_16_8, 8);
1434pack_16!(pack_16_9, 9);
1435pack_16!(pack_16_10, 10);
1436pack_16!(pack_16_11, 11);
1437pack_16!(pack_16_12, 12);
1438pack_16!(pack_16_13, 13);
1439pack_16!(pack_16_14, 14);
1440pack_16!(pack_16_15, 15);
1441pack_16!(pack_16_16, 16);
1442
1443macro_rules! unpack_32 {
1444 ($name:ident, $bit_width:expr) => {
1445 fn $name(input: &[u32; 1024 * $bit_width / u32::T], output: &mut [u32; 1024]) {
1446 for lane in 0..u32::LANES {
1447 unpack!(u32, $bit_width, input, lane, |$idx, $elem| {
1448 output[$idx] = $elem
1449 });
1450 }
1451 }
1452 };
1453}
1454
1455unpack_32!(unpack_32_1, 1);
1456unpack_32!(unpack_32_2, 2);
1457unpack_32!(unpack_32_3, 3);
1458unpack_32!(unpack_32_4, 4);
1459unpack_32!(unpack_32_5, 5);
1460unpack_32!(unpack_32_6, 6);
1461unpack_32!(unpack_32_7, 7);
1462unpack_32!(unpack_32_8, 8);
1463unpack_32!(unpack_32_9, 9);
1464unpack_32!(unpack_32_10, 10);
1465unpack_32!(unpack_32_11, 11);
1466unpack_32!(unpack_32_12, 12);
1467unpack_32!(unpack_32_13, 13);
1468unpack_32!(unpack_32_14, 14);
1469unpack_32!(unpack_32_15, 15);
1470unpack_32!(unpack_32_16, 16);
1471unpack_32!(unpack_32_17, 17);
1472unpack_32!(unpack_32_18, 18);
1473unpack_32!(unpack_32_19, 19);
1474unpack_32!(unpack_32_20, 20);
1475unpack_32!(unpack_32_21, 21);
1476unpack_32!(unpack_32_22, 22);
1477unpack_32!(unpack_32_23, 23);
1478unpack_32!(unpack_32_24, 24);
1479unpack_32!(unpack_32_25, 25);
1480unpack_32!(unpack_32_26, 26);
1481unpack_32!(unpack_32_27, 27);
1482unpack_32!(unpack_32_28, 28);
1483unpack_32!(unpack_32_29, 29);
1484unpack_32!(unpack_32_30, 30);
1485unpack_32!(unpack_32_31, 31);
1486unpack_32!(unpack_32_32, 32);
1487
1488macro_rules! pack_32 {
1489 ($name:ident, $bits:expr) => {
1490 fn $name(input: &[u32; 1024], output: &mut [u32; 1024 * $bits / u32::BITS as usize]) {
1491 for lane in 0..u32::LANES {
1492 pack!(u32, $bits, output, lane, |$idx| { input[$idx] });
1493 }
1494 }
1495 };
1496}
1497
1498pack_32!(pack_32_1, 1);
1499pack_32!(pack_32_2, 2);
1500pack_32!(pack_32_3, 3);
1501pack_32!(pack_32_4, 4);
1502pack_32!(pack_32_5, 5);
1503pack_32!(pack_32_6, 6);
1504pack_32!(pack_32_7, 7);
1505pack_32!(pack_32_8, 8);
1506pack_32!(pack_32_9, 9);
1507pack_32!(pack_32_10, 10);
1508pack_32!(pack_32_11, 11);
1509pack_32!(pack_32_12, 12);
1510pack_32!(pack_32_13, 13);
1511pack_32!(pack_32_14, 14);
1512pack_32!(pack_32_15, 15);
1513pack_32!(pack_32_16, 16);
1514pack_32!(pack_32_17, 17);
1515pack_32!(pack_32_18, 18);
1516pack_32!(pack_32_19, 19);
1517pack_32!(pack_32_20, 20);
1518pack_32!(pack_32_21, 21);
1519pack_32!(pack_32_22, 22);
1520pack_32!(pack_32_23, 23);
1521pack_32!(pack_32_24, 24);
1522pack_32!(pack_32_25, 25);
1523pack_32!(pack_32_26, 26);
1524pack_32!(pack_32_27, 27);
1525pack_32!(pack_32_28, 28);
1526pack_32!(pack_32_29, 29);
1527pack_32!(pack_32_30, 30);
1528pack_32!(pack_32_31, 31);
1529pack_32!(pack_32_32, 32);
1530
1531macro_rules! unpack_64 {
1532 ($name:ident, $bit_width:expr) => {
1533 fn $name(input: &[u64; 1024 * $bit_width / u64::T], output: &mut [u64; 1024]) {
1534 for lane in 0..u64::LANES {
1535 unpack!(u64, $bit_width, input, lane, |$idx, $elem| {
1536 output[$idx] = $elem
1537 });
1538 }
1539 }
1540 };
1541}
1542
1543unpack_64!(unpack_64_1, 1);
1544unpack_64!(unpack_64_2, 2);
1545unpack_64!(unpack_64_3, 3);
1546unpack_64!(unpack_64_4, 4);
1547unpack_64!(unpack_64_5, 5);
1548unpack_64!(unpack_64_6, 6);
1549unpack_64!(unpack_64_7, 7);
1550unpack_64!(unpack_64_8, 8);
1551unpack_64!(unpack_64_9, 9);
1552unpack_64!(unpack_64_10, 10);
1553unpack_64!(unpack_64_11, 11);
1554unpack_64!(unpack_64_12, 12);
1555unpack_64!(unpack_64_13, 13);
1556unpack_64!(unpack_64_14, 14);
1557unpack_64!(unpack_64_15, 15);
1558unpack_64!(unpack_64_16, 16);
1559unpack_64!(unpack_64_17, 17);
1560unpack_64!(unpack_64_18, 18);
1561unpack_64!(unpack_64_19, 19);
1562unpack_64!(unpack_64_20, 20);
1563unpack_64!(unpack_64_21, 21);
1564unpack_64!(unpack_64_22, 22);
1565unpack_64!(unpack_64_23, 23);
1566unpack_64!(unpack_64_24, 24);
1567unpack_64!(unpack_64_25, 25);
1568unpack_64!(unpack_64_26, 26);
1569unpack_64!(unpack_64_27, 27);
1570unpack_64!(unpack_64_28, 28);
1571unpack_64!(unpack_64_29, 29);
1572unpack_64!(unpack_64_30, 30);
1573unpack_64!(unpack_64_31, 31);
1574unpack_64!(unpack_64_32, 32);
1575
1576unpack_64!(unpack_64_33, 33);
1577unpack_64!(unpack_64_34, 34);
1578unpack_64!(unpack_64_35, 35);
1579unpack_64!(unpack_64_36, 36);
1580unpack_64!(unpack_64_37, 37);
1581unpack_64!(unpack_64_38, 38);
1582unpack_64!(unpack_64_39, 39);
1583unpack_64!(unpack_64_40, 40);
1584unpack_64!(unpack_64_41, 41);
1585unpack_64!(unpack_64_42, 42);
1586unpack_64!(unpack_64_43, 43);
1587unpack_64!(unpack_64_44, 44);
1588unpack_64!(unpack_64_45, 45);
1589unpack_64!(unpack_64_46, 46);
1590unpack_64!(unpack_64_47, 47);
1591unpack_64!(unpack_64_48, 48);
1592unpack_64!(unpack_64_49, 49);
1593unpack_64!(unpack_64_50, 50);
1594unpack_64!(unpack_64_51, 51);
1595unpack_64!(unpack_64_52, 52);
1596unpack_64!(unpack_64_53, 53);
1597unpack_64!(unpack_64_54, 54);
1598unpack_64!(unpack_64_55, 55);
1599unpack_64!(unpack_64_56, 56);
1600unpack_64!(unpack_64_57, 57);
1601unpack_64!(unpack_64_58, 58);
1602unpack_64!(unpack_64_59, 59);
1603unpack_64!(unpack_64_60, 60);
1604unpack_64!(unpack_64_61, 61);
1605unpack_64!(unpack_64_62, 62);
1606unpack_64!(unpack_64_63, 63);
1607unpack_64!(unpack_64_64, 64);
1608
1609macro_rules! pack_64 {
1610 ($name:ident, $bits:expr) => {
1611 fn $name(input: &[u64; 1024], output: &mut [u64; 1024 * $bits / u64::BITS as usize]) {
1612 for lane in 0..u64::LANES {
1613 pack!(u64, $bits, output, lane, |$idx| { input[$idx] });
1614 }
1615 }
1616 };
1617}
1618
1619pack_64!(pack_64_1, 1);
1620pack_64!(pack_64_2, 2);
1621pack_64!(pack_64_3, 3);
1622pack_64!(pack_64_4, 4);
1623pack_64!(pack_64_5, 5);
1624pack_64!(pack_64_6, 6);
1625pack_64!(pack_64_7, 7);
1626pack_64!(pack_64_8, 8);
1627pack_64!(pack_64_9, 9);
1628pack_64!(pack_64_10, 10);
1629pack_64!(pack_64_11, 11);
1630pack_64!(pack_64_12, 12);
1631pack_64!(pack_64_13, 13);
1632pack_64!(pack_64_14, 14);
1633pack_64!(pack_64_15, 15);
1634pack_64!(pack_64_16, 16);
1635pack_64!(pack_64_17, 17);
1636pack_64!(pack_64_18, 18);
1637pack_64!(pack_64_19, 19);
1638pack_64!(pack_64_20, 20);
1639pack_64!(pack_64_21, 21);
1640pack_64!(pack_64_22, 22);
1641pack_64!(pack_64_23, 23);
1642pack_64!(pack_64_24, 24);
1643pack_64!(pack_64_25, 25);
1644pack_64!(pack_64_26, 26);
1645pack_64!(pack_64_27, 27);
1646pack_64!(pack_64_28, 28);
1647pack_64!(pack_64_29, 29);
1648pack_64!(pack_64_30, 30);
1649pack_64!(pack_64_31, 31);
1650pack_64!(pack_64_32, 32);
1651
1652pack_64!(pack_64_33, 33);
1653pack_64!(pack_64_34, 34);
1654pack_64!(pack_64_35, 35);
1655pack_64!(pack_64_36, 36);
1656pack_64!(pack_64_37, 37);
1657pack_64!(pack_64_38, 38);
1658pack_64!(pack_64_39, 39);
1659pack_64!(pack_64_40, 40);
1660pack_64!(pack_64_41, 41);
1661pack_64!(pack_64_42, 42);
1662pack_64!(pack_64_43, 43);
1663pack_64!(pack_64_44, 44);
1664pack_64!(pack_64_45, 45);
1665pack_64!(pack_64_46, 46);
1666pack_64!(pack_64_47, 47);
1667pack_64!(pack_64_48, 48);
1668pack_64!(pack_64_49, 49);
1669pack_64!(pack_64_50, 50);
1670pack_64!(pack_64_51, 51);
1671pack_64!(pack_64_52, 52);
1672pack_64!(pack_64_53, 53);
1673pack_64!(pack_64_54, 54);
1674pack_64!(pack_64_55, 55);
1675pack_64!(pack_64_56, 56);
1676pack_64!(pack_64_57, 57);
1677pack_64!(pack_64_58, 58);
1678pack_64!(pack_64_59, 59);
1679pack_64!(pack_64_60, 60);
1680pack_64!(pack_64_61, 61);
1681pack_64!(pack_64_62, 62);
1682pack_64!(pack_64_63, 63);
1683pack_64!(pack_64_64, 64);
1684
1685#[cfg(test)]
1686mod test {
1687 use super::*;
1688 use core::array;
1689 pub struct XorShift {
1691 state: u64,
1692 }
1693
1694 impl XorShift {
1695 pub fn new(seed: u64) -> Self {
1696 Self { state: seed }
1697 }
1698
1699 pub fn next(&mut self) -> u64 {
1700 let mut x = self.state;
1701 x ^= x << 13;
1702 x ^= x >> 7;
1703 x ^= x << 17;
1704 self.state = x;
1705 x
1706 }
1707 }
1708
1709 fn pack_unpack_u8(bit_width: usize) {
1712 let mut values: [u8; 1024] = [0; 1024];
1713 let mut rng = XorShift::new(123456789);
1714 for value in &mut values {
1715 *value = (rng.next() % (1 << bit_width)) as u8;
1716 }
1717
1718 let mut packed = vec![0; 1024 * bit_width / 8];
1719 for lane in 0..u8::LANES {
1720 pack!(u8, bit_width, packed, lane, |$pos| {
1722 values[$pos]
1723 });
1724 }
1725
1726 let mut unpacked: [u8; 1024] = [0; 1024];
1727 for lane in 0..u8::LANES {
1728 unpack!(u8, bit_width, packed, lane, |$idx, $elem| {
1730 unpacked[$idx] = $elem;
1731 });
1732 }
1733
1734 assert_eq!(values, unpacked);
1735 }
1736
1737 fn pack_unpack_u16(bit_width: usize) {
1738 let mut values: [u16; 1024] = [0; 1024];
1739 let mut rng = XorShift::new(123456789);
1740 for value in &mut values {
1741 *value = (rng.next() % (1 << bit_width)) as u16;
1742 }
1743
1744 let mut packed = vec![0; 1024 * bit_width / 16];
1745 for lane in 0..u16::LANES {
1746 pack!(u16, bit_width, packed, lane, |$pos| {
1748 values[$pos]
1749 });
1750 }
1751
1752 let mut unpacked: [u16; 1024] = [0; 1024];
1753 for lane in 0..u16::LANES {
1754 unpack!(u16, bit_width, packed, lane, |$idx, $elem| {
1756 unpacked[$idx] = $elem;
1757 });
1758 }
1759
1760 assert_eq!(values, unpacked);
1761 }
1762
1763 fn pack_unpack_u32(bit_width: usize) {
1764 let mut values: [u32; 1024] = [0; 1024];
1765 let mut rng = XorShift::new(123456789);
1766 for value in &mut values {
1767 *value = (rng.next() % (1 << bit_width)) as u32;
1768 }
1769
1770 let mut packed = vec![0; 1024 * bit_width / 32];
1771 for lane in 0..u32::LANES {
1772 pack!(u32, bit_width, packed, lane, |$pos| {
1774 values[$pos]
1775 });
1776 }
1777
1778 let mut unpacked: [u32; 1024] = [0; 1024];
1779 for lane in 0..u32::LANES {
1780 unpack!(u32, bit_width, packed, lane, |$idx, $elem| {
1782 unpacked[$idx] = $elem;
1783 });
1784 }
1785
1786 assert_eq!(values, unpacked);
1787 }
1788
1789 fn pack_unpack_u64(bit_width: usize) {
1790 let mut values: [u64; 1024] = [0; 1024];
1791 let mut rng = XorShift::new(123456789);
1792 if bit_width == 64 {
1793 for value in &mut values {
1794 *value = rng.next();
1795 }
1796 } else {
1797 for value in &mut values {
1798 *value = rng.next() % (1 << bit_width);
1799 }
1800 }
1801
1802 let mut packed = vec![0; 1024 * bit_width / 64];
1803 for lane in 0..u64::LANES {
1804 pack!(u64, bit_width, packed, lane, |$pos| {
1806 values[$pos]
1807 });
1808 }
1809
1810 let mut unpacked: [u64; 1024] = [0; 1024];
1811 for lane in 0..u64::LANES {
1812 unpack!(u64, bit_width, packed, lane, |$idx, $elem| {
1814 unpacked[$idx] = $elem;
1815 });
1816 }
1817
1818 assert_eq!(values, unpacked);
1819 }
1820
1821 #[test]
1822 fn test_pack() {
1823 pack_unpack_u8(0);
1824 pack_unpack_u8(1);
1825 pack_unpack_u8(2);
1826 pack_unpack_u8(3);
1827 pack_unpack_u8(4);
1828 pack_unpack_u8(5);
1829 pack_unpack_u8(6);
1830 pack_unpack_u8(7);
1831 pack_unpack_u8(8);
1832
1833 pack_unpack_u16(0);
1834 pack_unpack_u16(1);
1835 pack_unpack_u16(2);
1836 pack_unpack_u16(3);
1837 pack_unpack_u16(4);
1838 pack_unpack_u16(5);
1839 pack_unpack_u16(6);
1840 pack_unpack_u16(7);
1841 pack_unpack_u16(8);
1842 pack_unpack_u16(9);
1843 pack_unpack_u16(10);
1844 pack_unpack_u16(11);
1845 pack_unpack_u16(12);
1846 pack_unpack_u16(13);
1847 pack_unpack_u16(14);
1848 pack_unpack_u16(15);
1849 pack_unpack_u16(16);
1850
1851 pack_unpack_u32(0);
1852 pack_unpack_u32(1);
1853 pack_unpack_u32(2);
1854 pack_unpack_u32(3);
1855 pack_unpack_u32(4);
1856 pack_unpack_u32(5);
1857 pack_unpack_u32(6);
1858 pack_unpack_u32(7);
1859 pack_unpack_u32(8);
1860 pack_unpack_u32(9);
1861 pack_unpack_u32(10);
1862 pack_unpack_u32(11);
1863 pack_unpack_u32(12);
1864 pack_unpack_u32(13);
1865 pack_unpack_u32(14);
1866 pack_unpack_u32(15);
1867 pack_unpack_u32(16);
1868 pack_unpack_u32(17);
1869 pack_unpack_u32(18);
1870 pack_unpack_u32(19);
1871 pack_unpack_u32(20);
1872 pack_unpack_u32(21);
1873 pack_unpack_u32(22);
1874 pack_unpack_u32(23);
1875 pack_unpack_u32(24);
1876 pack_unpack_u32(25);
1877 pack_unpack_u32(26);
1878 pack_unpack_u32(27);
1879 pack_unpack_u32(28);
1880 pack_unpack_u32(29);
1881 pack_unpack_u32(30);
1882 pack_unpack_u32(31);
1883 pack_unpack_u32(32);
1884
1885 pack_unpack_u64(0);
1886 pack_unpack_u64(1);
1887 pack_unpack_u64(2);
1888 pack_unpack_u64(3);
1889 pack_unpack_u64(4);
1890 pack_unpack_u64(5);
1891 pack_unpack_u64(6);
1892 pack_unpack_u64(7);
1893 pack_unpack_u64(8);
1894 pack_unpack_u64(9);
1895 pack_unpack_u64(10);
1896 pack_unpack_u64(11);
1897 pack_unpack_u64(12);
1898 pack_unpack_u64(13);
1899 pack_unpack_u64(14);
1900 pack_unpack_u64(15);
1901 pack_unpack_u64(16);
1902 pack_unpack_u64(17);
1903 pack_unpack_u64(18);
1904 pack_unpack_u64(19);
1905 pack_unpack_u64(20);
1906 pack_unpack_u64(21);
1907 pack_unpack_u64(22);
1908 pack_unpack_u64(23);
1909 pack_unpack_u64(24);
1910 pack_unpack_u64(25);
1911 pack_unpack_u64(26);
1912 pack_unpack_u64(27);
1913 pack_unpack_u64(28);
1914 pack_unpack_u64(29);
1915 pack_unpack_u64(30);
1916 pack_unpack_u64(31);
1917 pack_unpack_u64(32);
1918 pack_unpack_u64(33);
1919 pack_unpack_u64(34);
1920 pack_unpack_u64(35);
1921 pack_unpack_u64(36);
1922 pack_unpack_u64(37);
1923 pack_unpack_u64(38);
1924 pack_unpack_u64(39);
1925 pack_unpack_u64(40);
1926 pack_unpack_u64(41);
1927 pack_unpack_u64(42);
1928 pack_unpack_u64(43);
1929 pack_unpack_u64(44);
1930 pack_unpack_u64(45);
1931 pack_unpack_u64(46);
1932 pack_unpack_u64(47);
1933 pack_unpack_u64(48);
1934 pack_unpack_u64(49);
1935 pack_unpack_u64(50);
1936 pack_unpack_u64(51);
1937 pack_unpack_u64(52);
1938 pack_unpack_u64(53);
1939 pack_unpack_u64(54);
1940 pack_unpack_u64(55);
1941 pack_unpack_u64(56);
1942 pack_unpack_u64(57);
1943 pack_unpack_u64(58);
1944 pack_unpack_u64(59);
1945 pack_unpack_u64(60);
1946 pack_unpack_u64(61);
1947 pack_unpack_u64(62);
1948 pack_unpack_u64(63);
1949 pack_unpack_u64(64);
1950 }
1951
1952 fn unchecked_pack_unpack_u8(bit_width: usize) {
1953 let mut values = [0u8; 1024];
1954 let mut rng = XorShift::new(123456789);
1955 for value in &mut values {
1956 *value = (rng.next() % (1 << bit_width)) as u8;
1957 }
1958 let mut packed = vec![0; 1024 * bit_width / 8];
1959 unsafe {
1960 BitPacking::unchecked_pack(bit_width, &values, &mut packed);
1961 }
1962 let mut output = [0; 1024];
1963 unsafe { BitPacking::unchecked_unpack(bit_width, &packed, &mut output) };
1964 assert_eq!(values, output);
1965 }
1966
1967 fn unchecked_pack_unpack_u16(bit_width: usize) {
1968 let mut values = [0u16; 1024];
1969 let mut rng = XorShift::new(123456789);
1970 for value in &mut values {
1971 *value = (rng.next() % (1 << bit_width)) as u16;
1972 }
1973 let mut packed = vec![0; 1024 * bit_width / u16::T];
1974 unsafe {
1975 BitPacking::unchecked_pack(bit_width, &values, &mut packed);
1976 }
1977 let mut output = [0; 1024];
1978 unsafe { BitPacking::unchecked_unpack(bit_width, &packed, &mut output) };
1979 assert_eq!(values, output);
1980 }
1981
1982 fn unchecked_pack_unpack_u32(bit_width: usize) {
1983 let mut values = [0u32; 1024];
1984 let mut rng = XorShift::new(123456789);
1985 for value in &mut values {
1986 *value = (rng.next() % (1 << bit_width)) as u32;
1987 }
1988 let mut packed = vec![0; 1024 * bit_width / u32::T];
1989 unsafe {
1990 BitPacking::unchecked_pack(bit_width, &values, &mut packed);
1991 }
1992 let mut output = [0; 1024];
1993 unsafe { BitPacking::unchecked_unpack(bit_width, &packed, &mut output) };
1994 assert_eq!(values, output);
1995 }
1996
1997 fn unchecked_pack_unpack_u64(bit_width: usize) {
1998 let mut values = [0u64; 1024];
1999 let mut rng = XorShift::new(123456789);
2000 if bit_width == 64 {
2001 for value in &mut values {
2002 *value = rng.next();
2003 }
2004 }
2005 let mut packed = vec![0; 1024 * bit_width / u64::T];
2006 unsafe {
2007 BitPacking::unchecked_pack(bit_width, &values, &mut packed);
2008 }
2009 let mut output = [0; 1024];
2010 unsafe { BitPacking::unchecked_unpack(bit_width, &packed, &mut output) };
2011 assert_eq!(values, output);
2012 }
2013
2014 #[test]
2015 fn test_unchecked_pack() {
2016 let input = array::from_fn(|i| i as u32);
2017 let mut packed = [0; 320];
2018 unsafe { BitPacking::unchecked_pack(10, &input, &mut packed) };
2019 let mut output = [0; 1024];
2020 unsafe { BitPacking::unchecked_unpack(10, &packed, &mut output) };
2021 assert_eq!(input, output);
2022
2023 unchecked_pack_unpack_u8(1);
2024 unchecked_pack_unpack_u8(2);
2025 unchecked_pack_unpack_u8(3);
2026 unchecked_pack_unpack_u8(4);
2027 unchecked_pack_unpack_u8(5);
2028 unchecked_pack_unpack_u8(6);
2029 unchecked_pack_unpack_u8(7);
2030 unchecked_pack_unpack_u8(8);
2031
2032 unchecked_pack_unpack_u16(1);
2033 unchecked_pack_unpack_u16(2);
2034 unchecked_pack_unpack_u16(3);
2035 unchecked_pack_unpack_u16(4);
2036 unchecked_pack_unpack_u16(5);
2037 unchecked_pack_unpack_u16(6);
2038 unchecked_pack_unpack_u16(7);
2039 unchecked_pack_unpack_u16(8);
2040 unchecked_pack_unpack_u16(9);
2041 unchecked_pack_unpack_u16(10);
2042 unchecked_pack_unpack_u16(11);
2043 unchecked_pack_unpack_u16(12);
2044 unchecked_pack_unpack_u16(13);
2045 unchecked_pack_unpack_u16(14);
2046 unchecked_pack_unpack_u16(15);
2047 unchecked_pack_unpack_u16(16);
2048
2049 unchecked_pack_unpack_u32(1);
2050 unchecked_pack_unpack_u32(2);
2051 unchecked_pack_unpack_u32(3);
2052 unchecked_pack_unpack_u32(4);
2053 unchecked_pack_unpack_u32(5);
2054 unchecked_pack_unpack_u32(6);
2055 unchecked_pack_unpack_u32(7);
2056 unchecked_pack_unpack_u32(8);
2057 unchecked_pack_unpack_u32(9);
2058 unchecked_pack_unpack_u32(10);
2059 unchecked_pack_unpack_u32(11);
2060 unchecked_pack_unpack_u32(12);
2061 unchecked_pack_unpack_u32(13);
2062 unchecked_pack_unpack_u32(14);
2063 unchecked_pack_unpack_u32(15);
2064 unchecked_pack_unpack_u32(16);
2065 unchecked_pack_unpack_u32(17);
2066 unchecked_pack_unpack_u32(18);
2067 unchecked_pack_unpack_u32(19);
2068 unchecked_pack_unpack_u32(20);
2069 unchecked_pack_unpack_u32(21);
2070 unchecked_pack_unpack_u32(22);
2071 unchecked_pack_unpack_u32(23);
2072 unchecked_pack_unpack_u32(24);
2073 unchecked_pack_unpack_u32(25);
2074 unchecked_pack_unpack_u32(26);
2075 unchecked_pack_unpack_u32(27);
2076 unchecked_pack_unpack_u32(28);
2077 unchecked_pack_unpack_u32(29);
2078 unchecked_pack_unpack_u32(30);
2079 unchecked_pack_unpack_u32(31);
2080 unchecked_pack_unpack_u32(32);
2081
2082 unchecked_pack_unpack_u64(1);
2083 unchecked_pack_unpack_u64(2);
2084 unchecked_pack_unpack_u64(3);
2085 unchecked_pack_unpack_u64(4);
2086 unchecked_pack_unpack_u64(5);
2087 unchecked_pack_unpack_u64(6);
2088 unchecked_pack_unpack_u64(7);
2089 unchecked_pack_unpack_u64(8);
2090 unchecked_pack_unpack_u64(9);
2091 unchecked_pack_unpack_u64(10);
2092 unchecked_pack_unpack_u64(11);
2093 unchecked_pack_unpack_u64(12);
2094 unchecked_pack_unpack_u64(13);
2095 unchecked_pack_unpack_u64(14);
2096 unchecked_pack_unpack_u64(15);
2097 unchecked_pack_unpack_u64(16);
2098 unchecked_pack_unpack_u64(17);
2099 unchecked_pack_unpack_u64(18);
2100 unchecked_pack_unpack_u64(19);
2101 unchecked_pack_unpack_u64(20);
2102 unchecked_pack_unpack_u64(21);
2103 unchecked_pack_unpack_u64(22);
2104 unchecked_pack_unpack_u64(23);
2105 unchecked_pack_unpack_u64(24);
2106 unchecked_pack_unpack_u64(25);
2107 unchecked_pack_unpack_u64(26);
2108 unchecked_pack_unpack_u64(27);
2109 unchecked_pack_unpack_u64(28);
2110 unchecked_pack_unpack_u64(29);
2111 unchecked_pack_unpack_u64(30);
2112 unchecked_pack_unpack_u64(31);
2113 unchecked_pack_unpack_u64(32);
2114 unchecked_pack_unpack_u64(33);
2115 unchecked_pack_unpack_u64(34);
2116 unchecked_pack_unpack_u64(35);
2117 unchecked_pack_unpack_u64(36);
2118 unchecked_pack_unpack_u64(37);
2119 unchecked_pack_unpack_u64(38);
2120 unchecked_pack_unpack_u64(39);
2121 unchecked_pack_unpack_u64(40);
2122 unchecked_pack_unpack_u64(41);
2123 unchecked_pack_unpack_u64(42);
2124 unchecked_pack_unpack_u64(43);
2125 unchecked_pack_unpack_u64(44);
2126 unchecked_pack_unpack_u64(45);
2127 unchecked_pack_unpack_u64(46);
2128 unchecked_pack_unpack_u64(47);
2129 unchecked_pack_unpack_u64(48);
2130 unchecked_pack_unpack_u64(49);
2131 unchecked_pack_unpack_u64(50);
2132 unchecked_pack_unpack_u64(51);
2133 unchecked_pack_unpack_u64(52);
2134 unchecked_pack_unpack_u64(53);
2135 unchecked_pack_unpack_u64(54);
2136 unchecked_pack_unpack_u64(55);
2137 unchecked_pack_unpack_u64(56);
2138 unchecked_pack_unpack_u64(57);
2139 unchecked_pack_unpack_u64(58);
2140 unchecked_pack_unpack_u64(59);
2141 unchecked_pack_unpack_u64(60);
2142 unchecked_pack_unpack_u64(61);
2143 unchecked_pack_unpack_u64(62);
2144 unchecked_pack_unpack_u64(63);
2145 unchecked_pack_unpack_u64(64);
2146 }
2147}