1use std::hint::unreachable_unchecked;
3use std::ops::Deref;
4
5use polars_error::{polars_bail, polars_err, PolarsError, PolarsResult};
6
7use crate::array::Splitable;
8use crate::buffer::Buffer;
9pub use crate::types::Offset;
10
11#[derive(Debug, Clone, PartialEq, Eq)]
16pub struct Offsets<O: Offset>(Vec<O>);
17
18impl<O: Offset> Default for Offsets<O> {
19 #[inline]
20 fn default() -> Self {
21 Self::new()
22 }
23}
24
25impl<O: Offset> Deref for Offsets<O> {
26 type Target = [O];
27
28 fn deref(&self) -> &Self::Target {
29 self.as_slice()
30 }
31}
32
33impl<O: Offset> TryFrom<Vec<O>> for Offsets<O> {
34 type Error = PolarsError;
35
36 #[inline]
37 fn try_from(offsets: Vec<O>) -> Result<Self, Self::Error> {
38 try_check_offsets(&offsets)?;
39 Ok(Self(offsets))
40 }
41}
42
43impl<O: Offset> TryFrom<Buffer<O>> for OffsetsBuffer<O> {
44 type Error = PolarsError;
45
46 #[inline]
47 fn try_from(offsets: Buffer<O>) -> Result<Self, Self::Error> {
48 try_check_offsets(&offsets)?;
49 Ok(Self(offsets))
50 }
51}
52
53impl<O: Offset> TryFrom<Vec<O>> for OffsetsBuffer<O> {
54 type Error = PolarsError;
55
56 #[inline]
57 fn try_from(offsets: Vec<O>) -> Result<Self, Self::Error> {
58 try_check_offsets(&offsets)?;
59 Ok(Self(offsets.into()))
60 }
61}
62
63impl<O: Offset> From<Offsets<O>> for OffsetsBuffer<O> {
64 #[inline]
65 fn from(offsets: Offsets<O>) -> Self {
66 Self(offsets.0.into())
67 }
68}
69
70impl<O: Offset> Offsets<O> {
71 #[inline]
73 pub fn new() -> Self {
74 Self(vec![O::zero()])
75 }
76
77 #[inline]
79 pub fn new_zeroed(length: usize) -> Self {
80 Self(vec![O::zero(); length + 1])
81 }
82
83 #[inline]
85 pub fn try_from_iter<I: IntoIterator<Item = usize>>(iter: I) -> PolarsResult<Self> {
86 let iterator = iter.into_iter();
87 let (lower, _) = iterator.size_hint();
88 let mut offsets = Self::with_capacity(lower);
89 for item in iterator {
90 offsets.try_push(item)?
91 }
92 Ok(offsets)
93 }
94
95 pub fn with_capacity(capacity: usize) -> Self {
97 let mut offsets = Vec::with_capacity(capacity + 1);
98 offsets.push(O::zero());
99 Self(offsets)
100 }
101
102 pub fn capacity(&self) -> usize {
104 self.0.capacity() - 1
105 }
106
107 pub fn reserve(&mut self, additional: usize) {
109 self.0.reserve(additional);
110 }
111
112 pub fn shrink_to_fit(&mut self) {
114 self.0.shrink_to_fit();
115 }
116
117 #[inline]
124 pub fn try_push(&mut self, length: usize) -> PolarsResult<()> {
125 if O::IS_LARGE {
126 let length = O::from_as_usize(length);
127 let old_length = self.last();
128 let new_length = *old_length + length;
129 self.0.push(new_length);
130 Ok(())
131 } else {
132 let length =
133 O::from_usize(length).ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
134
135 let old_length = self.last();
136 let new_length = old_length
137 .checked_add(&length)
138 .ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
139 self.0.push(new_length);
140 Ok(())
141 }
142 }
143
144 #[inline]
149 pub unsafe fn new_unchecked(offsets: Vec<O>) -> Self {
150 #[cfg(debug_assertions)]
151 {
152 let mut prev_offset = O::default();
153 let mut is_monotonely_increasing = true;
154 for offset in &offsets {
155 is_monotonely_increasing &= *offset >= prev_offset;
156 prev_offset = *offset;
157 }
158 assert!(
159 is_monotonely_increasing,
160 "Unsafe precondition violated. Invariant of offsets broken."
161 );
162 }
163
164 Self(offsets)
165 }
166
167 #[inline]
169 pub fn last(&self) -> &O {
170 match self.0.last() {
171 Some(element) => element,
172 None => unsafe { unreachable_unchecked() },
173 }
174 }
175
176 #[inline]
180 pub fn length_at(&self, index: usize) -> usize {
181 let (start, end) = self.start_end(index);
182 end - start
183 }
184
185 #[inline]
189 pub fn start_end(&self, index: usize) -> (usize, usize) {
190 assert!(index < self.len_proxy());
192 unsafe { self.start_end_unchecked(index) }
193 }
194
195 #[inline]
200 pub unsafe fn start_end_unchecked(&self, index: usize) -> (usize, usize) {
201 let start = self.0.get_unchecked(index).to_usize();
203 let end = self.0.get_unchecked(index + 1).to_usize();
204 (start, end)
205 }
206
207 #[inline]
209 pub fn len_proxy(&self) -> usize {
210 self.0.len() - 1
211 }
212
213 #[inline]
214 pub fn len(&self) -> usize {
216 self.0.len()
217 }
218
219 #[inline]
221 pub fn as_slice(&self) -> &[O] {
222 self.0.as_slice()
223 }
224
225 #[inline]
227 pub fn pop(&mut self) -> Option<O> {
228 if self.len_proxy() == 0 {
229 None
230 } else {
231 self.0.pop()
232 }
233 }
234
235 #[inline]
238 pub fn extend_constant(&mut self, additional: usize) {
239 let offset = *self.last();
240 if additional == 1 {
241 self.0.push(offset)
242 } else {
243 self.0.resize(self.len() + additional, offset)
244 }
245 }
246
247 #[inline]
251 pub fn try_from_lengths<I: Iterator<Item = usize>>(lengths: I) -> PolarsResult<Self> {
252 let mut self_ = Self::with_capacity(lengths.size_hint().0);
253 self_.try_extend_from_lengths(lengths)?;
254 Ok(self_)
255 }
256
257 #[inline]
261 pub fn try_extend_from_lengths<I: Iterator<Item = usize>>(
262 &mut self,
263 lengths: I,
264 ) -> PolarsResult<()> {
265 let mut total_length = 0;
266 let mut offset = *self.last();
267 let original_offset = offset.to_usize();
268
269 let lengths = lengths.map(|length| {
270 total_length += length;
271 O::from_as_usize(length)
272 });
273
274 let offsets = lengths.map(|length| {
275 offset += length; offset
277 });
278 self.0.extend(offsets);
279
280 let last_offset = original_offset
281 .checked_add(total_length)
282 .ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
283 O::from_usize(last_offset).ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
284 Ok(())
285 }
286
287 pub fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {
291 let mut length = *self.last();
292 let other_length = *other.last();
293 length
295 .checked_add(&other_length)
296 .ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
297
298 let lengths = other.as_slice().windows(2).map(|w| w[1] - w[0]);
299 let offsets = lengths.map(|new_length| {
300 length += new_length;
301 length
302 });
303 self.0.extend(offsets);
304 Ok(())
305 }
306
307 pub fn try_extend_from_slice(
311 &mut self,
312 other: &OffsetsBuffer<O>,
313 start: usize,
314 length: usize,
315 ) -> PolarsResult<()> {
316 if length == 0 {
317 return Ok(());
318 }
319 let other = &other.0[start..start + length + 1];
320 let other_length = other.last().expect("Length to be non-zero");
321 let mut length = *self.last();
322 length
324 .checked_add(other_length)
325 .ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
326
327 let lengths = other.windows(2).map(|w| w[1] - w[0]);
328 let offsets = lengths.map(|new_length| {
329 length += new_length;
330 length
331 });
332 self.0.extend(offsets);
333 Ok(())
334 }
335
336 #[inline]
338 pub fn into_inner(self) -> Vec<O> {
339 self.0
340 }
341}
342
343fn try_check_offsets<O: Offset>(offsets: &[O]) -> PolarsResult<()> {
345 match offsets.first() {
347 None => polars_bail!(ComputeError: "offsets must have at least one element"),
348 Some(first) => {
349 if *first < O::zero() {
350 polars_bail!(ComputeError: "offsets must be larger than 0")
351 }
352 let mut previous = *first;
353 let mut any_invalid = false;
354
355 for offset in offsets {
358 if previous > *offset {
359 any_invalid = true
360 }
361 previous = *offset;
362 }
363
364 if any_invalid {
365 polars_bail!(ComputeError: "offsets must be monotonically increasing")
366 } else {
367 Ok(())
368 }
369 },
370 }
371}
372
373#[derive(Clone, PartialEq, Debug)]
378pub struct OffsetsBuffer<O: Offset>(Buffer<O>);
379
380impl<O: Offset> Default for OffsetsBuffer<O> {
381 #[inline]
382 fn default() -> Self {
383 Self(vec![O::zero()].into())
384 }
385}
386
387impl<O: Offset> OffsetsBuffer<O> {
388 #[inline]
391 pub unsafe fn new_unchecked(offsets: Buffer<O>) -> Self {
392 Self(offsets)
393 }
394
395 #[inline]
397 pub fn new() -> Self {
398 Self(vec![O::zero()].into())
399 }
400
401 #[inline]
402 pub fn one_with_length(length: O) -> Self {
403 Self(vec![O::zero(), length].into())
404 }
405
406 #[inline]
408 pub fn into_mut(self) -> either::Either<Self, Offsets<O>> {
409 self.0
410 .into_mut()
411 .map_right(|offsets| unsafe { Offsets::new_unchecked(offsets) })
413 .map_left(Self)
414 }
415
416 #[inline]
418 pub fn buffer(&self) -> &Buffer<O> {
419 &self.0
420 }
421
422 #[inline]
424 pub fn len_proxy(&self) -> usize {
425 self.0.len() - 1
426 }
427
428 #[inline]
430 pub fn len(&self) -> usize {
431 self.0.len()
432 }
433
434 #[inline]
436 pub fn as_slice(&self) -> &[O] {
437 self.0.as_slice()
438 }
439
440 #[inline]
442 pub fn range(&self) -> O {
443 *self.last() - *self.first()
444 }
445
446 #[inline]
448 pub fn first(&self) -> &O {
449 match self.0.first() {
450 Some(element) => element,
451 None => unsafe { unreachable_unchecked() },
452 }
453 }
454
455 #[inline]
457 pub fn last(&self) -> &O {
458 match self.0.last() {
459 Some(element) => element,
460 None => unsafe { unreachable_unchecked() },
461 }
462 }
463
464 #[inline]
468 pub fn length_at(&self, index: usize) -> usize {
469 let (start, end) = self.start_end(index);
470 end - start
471 }
472
473 #[inline]
477 pub fn start_end(&self, index: usize) -> (usize, usize) {
478 assert!(index < self.len_proxy());
480 unsafe { self.start_end_unchecked(index) }
481 }
482
483 #[inline]
488 pub unsafe fn start_end_unchecked(&self, index: usize) -> (usize, usize) {
489 let start = self.0.get_unchecked(index).to_usize();
491 let end = self.0.get_unchecked(index + 1).to_usize();
492 (start, end)
493 }
494
495 #[inline]
500 pub fn slice(&mut self, offset: usize, length: usize) {
501 assert!(length > 0);
502 self.0.slice(offset, length);
503 }
504
505 #[inline]
510 pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
511 self.0.slice_unchecked(offset, length);
512 }
513
514 #[inline]
516 pub fn lengths(&self) -> impl ExactSizeIterator<Item = usize> + '_ {
517 self.0.windows(2).map(|w| (w[1] - w[0]).to_usize())
518 }
519
520 #[inline]
522 pub fn offset_and_length_iter(&self) -> impl ExactSizeIterator<Item = (usize, usize)> + '_ {
523 self.windows(2).map(|x| {
524 let [l, r] = x else { unreachable!() };
525 let l = l.to_usize();
526 let r = r.to_usize();
527 (l, r - l)
528 })
529 }
530
531 pub fn leaf_ranges_iter(
534 offsets: &[Self],
535 ) -> impl Iterator<Item = core::ops::Range<usize>> + '_ {
536 let others = &offsets[1..];
537
538 offsets[0].windows(2).map(move |x| {
539 let [l, r] = x else { unreachable!() };
540 let mut l = l.to_usize();
541 let mut r = r.to_usize();
542
543 for o in others {
544 let slc = o.as_slice();
545 l = slc[l].to_usize();
546 r = slc[r].to_usize();
547 }
548
549 l..r
550 })
551 }
552
553 pub fn leaf_full_start_end(offsets: &[Self]) -> core::ops::Range<usize> {
555 let mut l = offsets[0].first().to_usize();
556 let mut r = offsets[0].last().to_usize();
557
558 for o in &offsets[1..] {
559 let slc = o.as_slice();
560 l = slc[l].to_usize();
561 r = slc[r].to_usize();
562 }
563
564 l..r
565 }
566
567 #[inline]
569 pub fn into_inner(self) -> Buffer<O> {
570 self.0
571 }
572
573 #[inline]
575 pub fn delta(&self, start: usize, end: usize) -> usize {
576 assert!(start <= end);
577
578 (self.0[end + 1] - self.0[start]).to_usize()
579 }
580}
581
582impl From<&OffsetsBuffer<i32>> for OffsetsBuffer<i64> {
583 fn from(offsets: &OffsetsBuffer<i32>) -> Self {
584 Self(
586 offsets
587 .buffer()
588 .iter()
589 .map(|x| *x as i64)
590 .collect::<Vec<_>>()
591 .into(),
592 )
593 }
594}
595
596impl TryFrom<&OffsetsBuffer<i64>> for OffsetsBuffer<i32> {
597 type Error = PolarsError;
598
599 fn try_from(offsets: &OffsetsBuffer<i64>) -> Result<Self, Self::Error> {
600 i32::try_from(*offsets.last()).map_err(|_| polars_err!(ComputeError: "overflow"))?;
601
602 Ok(Self(
604 offsets
605 .buffer()
606 .iter()
607 .map(|x| *x as i32)
608 .collect::<Vec<_>>()
609 .into(),
610 ))
611 }
612}
613
614impl From<Offsets<i32>> for Offsets<i64> {
615 fn from(offsets: Offsets<i32>) -> Self {
616 Self(
618 offsets
619 .as_slice()
620 .iter()
621 .map(|x| *x as i64)
622 .collect::<Vec<_>>(),
623 )
624 }
625}
626
627impl TryFrom<Offsets<i64>> for Offsets<i32> {
628 type Error = PolarsError;
629
630 fn try_from(offsets: Offsets<i64>) -> Result<Self, Self::Error> {
631 i32::try_from(*offsets.last()).map_err(|_| polars_err!(ComputeError: "overflow"))?;
632
633 Ok(Self(
635 offsets
636 .as_slice()
637 .iter()
638 .map(|x| *x as i32)
639 .collect::<Vec<_>>(),
640 ))
641 }
642}
643
644impl<O: Offset> std::ops::Deref for OffsetsBuffer<O> {
645 type Target = [O];
646
647 #[inline]
648 fn deref(&self) -> &[O] {
649 self.0.as_slice()
650 }
651}
652
653impl<O: Offset> Splitable for OffsetsBuffer<O> {
654 fn check_bound(&self, offset: usize) -> bool {
655 offset <= self.len_proxy()
656 }
657
658 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
659 let mut lhs = self.0.clone();
660 let mut rhs = self.0.clone();
661
662 lhs.slice(0, offset + 1);
663 rhs.slice(offset, self.0.len() - offset);
664
665 (Self(lhs), Self(rhs))
666 }
667}