Open3D (C++ API)  0.19.0
Loading...
Searching...
No Matches
Indexer.h
Go to the documentation of this file.
1// ----------------------------------------------------------------------------
2// - Open3D: www.open3d.org -
3// ----------------------------------------------------------------------------
4// Copyright (c) 2018-2024 www.open3d.org
5// SPDX-License-Identifier: MIT
6// ----------------------------------------------------------------------------
7
8#pragma once
9
10#include <sstream>
11
13#include "open3d/core/Dtype.h"
16#include "open3d/core/Tensor.h"
19
20// The generated "Indexer_ispc.h" header will not be available outside the
21// library. Therefore, forward declare all exported ISPC classes.
22#ifdef BUILD_ISPC_MODULE
23namespace ispc {
24struct TensorRef;
25struct Indexer;
26} // namespace ispc
27#endif
28
29namespace open3d {
30namespace core {
31
32class Indexer;
33
34class IndexerIterator;
35
36// Maximum number of dimensions of TensorRef.
37static constexpr int64_t MAX_DIMS = 5;
38
39// Maximum number of inputs of an op.
40// MAX_INPUTS shall be >= MAX_DIMS to support advanced indexing.
41static constexpr int64_t MAX_INPUTS = 5;
42
43// Maximum number of outputs of an op. This number can be increased when
44// necessary.
45static constexpr int64_t MAX_OUTPUTS = 2;
46
47template <int NARGS, typename index_t = uint32_t>
50 const int64_t* sizes,
51 const int64_t* const* strides)
52 : dims_(dims) {
53 if (dims_ > MAX_DIMS) {
54 utility::LogError("tensor has too many (>{}) dims_", MAX_DIMS);
55 }
56
57 for (int i = 0; i < MAX_DIMS; ++i) {
58 if (i < dims_) {
59 sizes_[i] = sizes[i];
60 } else {
61 sizes_[i] = 1;
62 }
63 for (int arg = 0; arg < NARGS; arg++) {
64 strides_[i][arg] = i < dims_ ? strides[arg][i] : 0;
65 }
66 }
67 }
68
70 index_t linear_idx) const {
72#if defined(__CUDA_ARCH__)
73#pragma unroll
74#endif
75 for (int arg = 0; arg < NARGS; arg++) {
76 offsets[arg] = 0;
77 }
78
79#if defined(__CUDA_ARCH__)
80#pragma unroll
81#endif
82 for (int dim = 0; dim < MAX_DIMS; ++dim) {
83 if (dim == dims_) {
84 break;
85 }
86 index_t mod = linear_idx % sizes_[dim];
87 linear_idx = linear_idx / sizes_[dim];
88
89#if defined(__CUDA_ARCH__)
90#pragma unroll
91#endif
92 for (int arg = 0; arg < NARGS; arg++) {
93 offsets[arg] += mod * strides_[dim][arg];
94 }
95 }
96 return offsets;
97 }
98
99 int dims_;
100 index_t sizes_[MAX_DIMS];
101 index_t strides_[MAX_DIMS][NARGS];
102};
103
105struct TensorRef {
106 // The default copy constructor works on __device__ as well so we don't
107 // define it explicitly. shape_[MAX_DIMS] and strides[MAX_DIMS] will be
108 // copied fully.
110
111 TensorRef(const Tensor& t) {
112 if (t.NumDims() > MAX_DIMS) {
113 utility::LogError("Tensor has too many dimensions {} > {}.",
114 t.NumDims(), MAX_DIMS);
115 }
116 data_ptr_ = const_cast<void*>(t.GetDataPtr());
117 ndims_ = t.NumDims();
118 dtype_byte_size_ = t.GetDtype().ByteSize();
119 for (int64_t i = 0; i < ndims_; ++i) {
120 shape_[i] = t.GetShape(i);
121 byte_strides_[i] = t.GetStride(i) * dtype_byte_size_;
122 }
123 }
124
131 void Permute(const SizeVector& dims) {
132 // Check dims are permuntation of [0, 1, 2, ..., n-1]
133 if (static_cast<int64_t>(dims.size()) != ndims_) {
134 utility::LogError("Number of dimensions mismatch {} != {}.",
135 dims.size(), ndims_);
136 }
137 std::vector<bool> seen_dims(ndims_, false);
138 for (const int64_t& dim : dims) {
139 seen_dims[dim] = true;
140 }
141 if (!std::all_of(seen_dims.begin(), seen_dims.end(),
142 [](bool seen) { return seen; })) {
143 utility::LogError(
144 "Permute dims must be a permuntation from 0 to {}.",
145 dims.size() - 1);
146 }
147
148 // Map to new shape and strides
149 SizeVector new_shape(ndims_);
150 SizeVector new_byte_strides(ndims_);
151 for (int64_t i = 0; i < ndims_; ++i) {
152 int64_t old_dim = shape_util::WrapDim(dims[i], ndims_);
153 new_shape[i] = shape_[old_dim];
154 new_byte_strides[i] = byte_strides_[old_dim];
155 }
156 for (int64_t i = 0; i < ndims_; ++i) {
157 shape_[i] = new_shape[i];
158 byte_strides_[i] = new_byte_strides[i];
159 }
160 }
161
163 inline bool IsContiguous() const {
164 SizeVector shape(ndims_);
165 SizeVector strides(ndims_);
166 for (int64_t i = 0; i < ndims_; ++i) {
167 shape[i] = shape_[i];
168 strides[i] = byte_strides_[i] / dtype_byte_size_;
169 }
170 return shape_util::DefaultStrides(shape) == strides;
171 }
172
173 bool operator==(const TensorRef& other) const {
174 bool rc = true;
175 rc = rc && (data_ptr_ == other.data_ptr_);
176 rc = rc && (ndims_ == other.ndims_);
177 rc = rc && (dtype_byte_size_ == other.dtype_byte_size_);
178 for (int64_t i = 0; i < ndims_; ++i) {
179 rc = rc && (shape_[i] == other.shape_[i]);
180 rc = rc && (byte_strides_[i] == other.byte_strides_[i]);
181 }
182 return rc;
183 }
184
185 bool operator!=(const TensorRef& other) const { return !(*this == other); }
186
187#ifdef BUILD_ISPC_MODULE
189 ispc::TensorRef ToISPC() const;
190#endif
191
193 int64_t ndims_ = 0;
194 int64_t dtype_byte_size_ = 0;
195 int64_t shape_[MAX_DIMS];
196 int64_t byte_strides_[MAX_DIMS];
197};
198
199enum class DtypePolicy {
200 NONE, // Do not check. Expects the kernel to handle the conversion.
201 // E.g. in Copy kernel with type casting.
202 ALL_SAME, // All inputs and outputs to to have the same dtype.
203 INPUT_SAME, // All inputs have the same dtype.
204 INPUT_SAME_OUTPUT_BOOL // All inputs have the same dtype. Outputs
205 // have bool dtype.
206};
207
222public:
223 TensorIterator(const Tensor& tensor)
224 : input_(TensorRef(tensor)), ndims_(tensor.NumDims()) {}
225
227 int64_t num_workloads = 1;
228 for (int64_t i = 0; i < ndims_; ++i) {
229 num_workloads *= input_.shape_[i];
230 }
231 return num_workloads;
232 }
233
234 OPEN3D_HOST_DEVICE void* GetPtr(int64_t workload_idx) const {
235 if (workload_idx < 0 || workload_idx >= NumWorkloads()) {
236 return nullptr;
237 }
238 int64_t offset = 0;
239 workload_idx = workload_idx * input_.dtype_byte_size_;
240 for (int64_t i = 0; i < ndims_; ++i) {
241 offset += workload_idx / input_.byte_strides_[i] *
243 workload_idx = workload_idx % input_.byte_strides_[i];
244 }
245 return static_cast<void*>(static_cast<char*>(input_.data_ptr_) +
246 offset);
247 }
248
249protected:
251 int64_t ndims_;
252};
253
261class Indexer {
262public:
264 Indexer(const Indexer&) = default;
265 Indexer& operator=(const Indexer&) = default;
266
270 Indexer(const std::vector<Tensor>& input_tensors,
271 const Tensor& output_tensor,
272 DtypePolicy dtype_policy = DtypePolicy::ALL_SAME,
273 const SizeVector& reduction_dims = {});
274
275 Indexer(const std::vector<Tensor>& input_tensors,
276 const std::vector<Tensor>& output_tensors,
277 DtypePolicy dtype_policy = DtypePolicy::ALL_SAME,
278 const SizeVector& reduction_dims = {});
279
281 bool CanUse32BitIndexing() const;
282
285 IndexerIterator SplitTo32BitIndexing() const;
286
290 std::unique_ptr<Indexer> SplitLargestDim();
291
294 Indexer GetPerOutputIndexer(int64_t output_idx) const;
295
296 bool ShouldAccumulate() const { return accumulate_; }
297
298 bool IsFinalOutput() const { return final_output_; }
299
305 void ShrinkDim(int64_t dim, int64_t start, int64_t size);
306
308 int64_t NumReductionDims() const;
309
311 int64_t NumDims() const { return ndims_; }
312
315 const int64_t* GetPrimaryShape() const { return primary_shape_; }
316 int64_t* GetPrimaryShape() { return primary_shape_; }
317
320 const int64_t* GetPrimaryStrides() const { return primary_strides_; }
321
332 int64_t NumWorkloads() const;
333
335 int64_t NumOutputElements() const;
336
338 int64_t NumInputs() const { return num_inputs_; }
339
341 int64_t NumOutputs() const { return num_outputs_; }
342
344 TensorRef& GetInput(int64_t i) {
345 if (i >= num_inputs_ || i < 0) {
346 utility::LogError("0 <= i < {} required, however, i = {}.",
347 num_inputs_, i);
348 }
349 return inputs_[i];
350 }
351 const TensorRef& GetInput(int64_t i) const {
352 if (i >= num_inputs_ || i < 0) {
353 utility::LogError("0 <= i < {} required, however, i = {}.",
354 num_inputs_, i);
355 }
356 return inputs_[i];
357 }
358
360 TensorRef& GetOutput(int64_t i) {
361 if (i >= num_outputs_ || i < 0) {
362 utility::LogError("0 <= i < {} required, however, i = {}.",
363 num_outputs_, i);
364 }
365 return outputs_[i];
366 }
367 const TensorRef& GetOutput(int64_t i) const {
368 if (i >= num_outputs_ || i < 0) {
369 utility::LogError("0 <= i < {} required, however, i = {}.",
370 num_outputs_, i);
371 }
372 return outputs_[i];
373 }
374
378 if (num_outputs_ > 1) {
379 utility::LogError("num_outputs_ == {} > 0, use GetOutput(i)",
381 }
382 return GetOutput(0);
383 }
384 const TensorRef& GetOutput() const {
385 if (num_outputs_ > 1) {
386 utility::LogError("num_outputs_ == {} > 0, use GetOutput(i)",
388 }
389 return GetOutput(0);
390 }
391
393 bool IsReductionDim(int64_t dim) const {
394 // All outputs have the same shape and reduction dims. Even if they
395 // don't have the same initial strides, the reduced strides are always
396 // set to 0. Thus it is okay to use outputs_[0].
397 return outputs_[0].byte_strides_[dim] == 0 && primary_shape_[dim] > 1;
398 }
399
405 OPEN3D_HOST_DEVICE char* GetInputPtr(int64_t input_idx,
406 int64_t workload_idx) const {
407 if (input_idx < 0 || input_idx >= num_inputs_) {
408 return nullptr;
409 }
410 return GetWorkloadDataPtr(inputs_[input_idx],
411 inputs_contiguous_[input_idx], workload_idx);
412 }
413
422 template <typename T>
423 OPEN3D_HOST_DEVICE T* GetInputPtr(int64_t input_idx,
424 int64_t workload_idx) const {
425 if (input_idx < 0 || input_idx >= num_inputs_) {
426 return nullptr;
427 }
428 return GetWorkloadDataPtr<T>(inputs_[input_idx],
429 inputs_contiguous_[input_idx],
430 workload_idx);
431 }
432
437 OPEN3D_HOST_DEVICE char* GetOutputPtr(int64_t workload_idx) const {
439 workload_idx);
440 }
441
449 template <typename T>
450 OPEN3D_HOST_DEVICE T* GetOutputPtr(int64_t workload_idx) const {
451 return GetWorkloadDataPtr<T>(outputs_[0], outputs_contiguous_[0],
452 workload_idx);
453 }
454
460 OPEN3D_HOST_DEVICE char* GetOutputPtr(int64_t output_idx,
461 int64_t workload_idx) const {
462 return GetWorkloadDataPtr(outputs_[output_idx],
463 outputs_contiguous_[output_idx],
464 workload_idx);
465 }
466
472 template <typename T>
473 OPEN3D_HOST_DEVICE T* GetOutputPtr(int64_t output_idx,
474 int64_t workload_idx) const {
475 return GetWorkloadDataPtr<T>(outputs_[output_idx],
476 outputs_contiguous_[output_idx],
477 workload_idx);
478 }
479
480#ifdef BUILD_ISPC_MODULE
482 ispc::Indexer ToISPC() const;
483#endif
484
485protected:
488 void CoalesceDimensions();
489
490 // Permute reduction dimensions to front.
491 // TODO: Sort the dimensions based on strides in ascending orderto improve
492 // thread coalescing.
493 void ReorderDimensions(const SizeVector& reduction_dims);
494
497
500
527 static void BroadcastRestride(TensorRef& src,
528 int64_t dst_ndims,
529 const int64_t* dst_shape);
530
533 static void ReductionRestride(TensorRef& dst,
534 int64_t src_ndims,
535 const int64_t* src_shape,
536 const SizeVector& reduction_dims);
537
542 bool tr_contiguous,
543 int64_t workload_idx) const {
544 // For 0-sized input reduction op, the output Tensor
545 // workload_idx == 1 > NumWorkloads() == 0.
546 if (workload_idx < 0) {
547 return nullptr;
548 }
549 if (tr_contiguous) {
550 return static_cast<char*>(tr.data_ptr_) +
551 workload_idx * tr.dtype_byte_size_;
552 } else {
553 int64_t offset = 0;
554 for (int64_t i = 0; i < ndims_; ++i) {
555 offset += workload_idx / primary_strides_[i] *
556 tr.byte_strides_[i];
557 workload_idx = workload_idx % primary_strides_[i];
558 }
559 return static_cast<char*>(tr.data_ptr_) + offset;
560 }
561 }
562
569 template <typename T>
571 bool tr_contiguous,
572 int64_t workload_idx) const {
573 // For 0-sized input reduction op, the output Tensor
574 // workload_idx == 1 > NumWorkloads() == 0.
575 if (workload_idx < 0) {
576 return nullptr;
577 }
578 if (tr_contiguous) {
579 return static_cast<T*>(tr.data_ptr_) + workload_idx;
580 } else {
581 int64_t offset = 0;
582 for (int64_t i = 0; i < ndims_; ++i) {
583 offset += workload_idx / primary_strides_[i] *
584 tr.byte_strides_[i];
585 workload_idx = workload_idx % primary_strides_[i];
586 }
587 return static_cast<T*>(static_cast<void*>(
588 static_cast<char*>(tr.data_ptr_) + offset));
589 }
590 }
591
593 int64_t num_inputs_ = 0;
594 int64_t num_outputs_ = 0;
595
597 TensorRef inputs_[MAX_INPUTS];
598
600 TensorRef outputs_[MAX_OUTPUTS];
601
603 bool inputs_contiguous_[MAX_INPUTS];
604
606 bool outputs_contiguous_[MAX_OUTPUTS];
607
619 int64_t primary_shape_[MAX_DIMS];
620
623 int64_t primary_strides_[MAX_DIMS];
624
626 int64_t ndims_ = 0;
627
631 bool final_output_ = true;
632
635 bool accumulate_ = false;
636};
637
639public:
640 struct Iterator {
642 Iterator(const Indexer& indexer);
643 Iterator(Iterator&& other) = default;
644
645 Indexer& operator*() const;
647 bool operator==(const Iterator& other) const;
648 bool operator!=(const Iterator& other) const;
649
650 std::vector<std::unique_ptr<Indexer>> vec_;
651 };
652
654
655 Iterator begin() const;
656 Iterator end() const;
657
658private:
659 const Indexer& indexer_;
660};
661
662} // namespace core
663} // namespace open3d
Indexer indexer
Definition BinaryEWSYCL.cpp:29
Common CUDA utilities.
#define OPEN3D_HOST_DEVICE
Definition CUDAUtils.h:44
double t
Definition SurfaceReconstructionPoisson.cpp:172
Definition Indexer.h:261
const TensorRef & GetInput(int64_t i) const
Definition Indexer.h:351
void UpdatePrimaryStrides()
Update primary_strides_ based on primary_shape_.
Definition Indexer.cpp:556
void UpdateContiguousFlags()
Update input_contiguous_ and output_contiguous_.
Definition Indexer.cpp:565
bool inputs_contiguous_[MAX_INPUTS]
Array of contiguous flags for all input TensorRefs.
Definition Indexer.h:603
bool outputs_contiguous_[MAX_OUTPUTS]
Array of contiguous flags for all output TensorRefs.
Definition Indexer.h:606
Indexer()
Definition Indexer.h:263
OPEN3D_HOST_DEVICE T * GetOutputPtr(int64_t output_idx, int64_t workload_idx) const
Definition Indexer.h:473
int64_t num_outputs_
Definition Indexer.h:594
OPEN3D_HOST_DEVICE char * GetOutputPtr(int64_t output_idx, int64_t workload_idx) const
Definition Indexer.h:460
TensorRef & GetOutput(int64_t i)
Returns output TensorRef.
Definition Indexer.h:360
static void ReductionRestride(TensorRef &dst, int64_t src_ndims, const int64_t *src_shape, const SizeVector &reduction_dims)
Definition Indexer.cpp:602
TensorRef outputs_[MAX_OUTPUTS]
Array of output TensorRefs.
Definition Indexer.h:600
bool IsReductionDim(int64_t dim) const
Returns true if the dim -th dimension is reduced.
Definition Indexer.h:393
int64_t NumReductionDims() const
Returns the number of reduction dimensions.
Definition Indexer.cpp:395
int64_t NumInputs() const
Number of input Tensors.
Definition Indexer.h:338
int64_t primary_strides_[MAX_DIMS]
Definition Indexer.h:623
const TensorRef & GetOutput() const
Definition Indexer.h:384
Indexer(const Indexer &)=default
bool IsFinalOutput() const
Definition Indexer.h:298
void ReorderDimensions(const SizeVector &reduction_dims)
Definition Indexer.cpp:491
void CoalesceDimensions()
Definition Indexer.cpp:425
Indexer & operator=(const Indexer &)=default
int64_t NumOutputElements() const
Returns the number of output elements.
Definition Indexer.cpp:414
int64_t num_inputs_
Number of input and output Tensors.
Definition Indexer.h:593
bool accumulate_
Definition Indexer.h:635
bool CanUse32BitIndexing() const
Returns true iff the maximum_offsets in bytes are smaller than 2^31 - 1.
Definition Indexer.cpp:198
TensorRef inputs_[MAX_INPUTS]
Array of input TensorRefs.
Definition Indexer.h:597
OPEN3D_HOST_DEVICE char * GetWorkloadDataPtr(const TensorRef &tr, bool tr_contiguous, int64_t workload_idx) const
Definition Indexer.h:541
OPEN3D_HOST_DEVICE T * GetOutputPtr(int64_t workload_idx) const
Definition Indexer.h:450
bool ShouldAccumulate() const
Definition Indexer.h:296
const int64_t * GetPrimaryStrides() const
Definition Indexer.h:320
Indexer GetPerOutputIndexer(int64_t output_idx) const
Definition Indexer.cpp:303
std::unique_ptr< Indexer > SplitLargestDim()
Definition Indexer.cpp:238
OPEN3D_HOST_DEVICE T * GetWorkloadDataPtr(const TensorRef &tr, bool tr_contiguous, int64_t workload_idx) const
Definition Indexer.h:570
OPEN3D_HOST_DEVICE char * GetOutputPtr(int64_t workload_idx) const
Definition Indexer.h:437
OPEN3D_HOST_DEVICE T * GetInputPtr(int64_t input_idx, int64_t workload_idx) const
Definition Indexer.h:423
int64_t NumWorkloads() const
Definition Indexer.cpp:406
OPEN3D_HOST_DEVICE char * GetInputPtr(int64_t input_idx, int64_t workload_idx) const
Definition Indexer.h:405
IndexerIterator SplitTo32BitIndexing() const
Definition Indexer.cpp:234
const int64_t * GetPrimaryShape() const
Definition Indexer.h:315
TensorRef & GetInput(int64_t i)
Returns input TensorRef.
Definition Indexer.h:344
TensorRef & GetOutput()
Definition Indexer.h:377
static void BroadcastRestride(TensorRef &src, int64_t dst_ndims, const int64_t *dst_shape)
Definition Indexer.cpp:575
bool final_output_
Definition Indexer.h:631
const TensorRef & GetOutput(int64_t i) const
Definition Indexer.h:367
int64_t ndims_
Indexer's global number of dimensions.
Definition Indexer.h:626
void ShrinkDim(int64_t dim, int64_t start, int64_t size)
Definition Indexer.cpp:364
int64_t NumDims() const
Returns number of dimensions of the Indexer.
Definition Indexer.h:311
int64_t NumOutputs() const
Number of output Tensors.
Definition Indexer.h:341
int64_t * GetPrimaryShape()
Definition Indexer.h:316
int64_t primary_shape_[MAX_DIMS]
Definition Indexer.h:619
Definition Indexer.h:638
Iterator end() const
Definition Indexer.cpp:671
Iterator begin() const
Definition Indexer.cpp:667
Definition SizeVector.h:69
size_t size() const
Definition SmallVector.h:119
Definition Tensor.h:32
Definition Indexer.h:221
OPEN3D_HOST_DEVICE int64_t NumWorkloads() const
Definition Indexer.h:226
TensorRef input_
Definition Indexer.h:250
TensorIterator(const Tensor &tensor)
Definition Indexer.h:223
OPEN3D_HOST_DEVICE void * GetPtr(int64_t workload_idx) const
Definition Indexer.h:234
int64_t ndims_
Definition Indexer.h:251
int size
Definition FilePCD.cpp:40
int offset
Definition FilePCD.cpp:45
int64_t WrapDim(int64_t dim, int64_t max_dim, bool inclusive)
Wrap around negative dim.
Definition ShapeUtil.cpp:131
SizeVector DefaultStrides(const SizeVector &shape)
Compute default strides for a shape when a tensor is contiguous.
Definition ShapeUtil.cpp:214
DtypePolicy
Definition Indexer.h:199
Definition PinholeCameraIntrinsic.cpp:16
bool operator!=(const Iterator &other) const
Definition Indexer.cpp:663
Iterator()
Definition Indexer.h:641
std::vector< std::unique_ptr< Indexer > > vec_
Definition Indexer.h:650
Iterator(Iterator &&other)=default
Indexer & operator*() const
Definition Indexer.cpp:649
bool operator==(const Iterator &other) const
Definition Indexer.cpp:660
Iterator & operator++()
Definition Indexer.cpp:651
Definition Indexer.h:48
index_t sizes_[MAX_DIMS]
Definition Indexer.h:100
OPEN3D_HOST_DEVICE utility::MiniVec< index_t, NARGS > get(index_t linear_idx) const
Definition Indexer.h:69
int dims_
Definition Indexer.h:99
OffsetCalculator(int dims, const int64_t *sizes, const int64_t *const *strides)
Definition Indexer.h:49
index_t strides_[MAX_DIMS][NARGS]
Definition Indexer.h:101
A minimalistic class that reference a Tensor.
Definition Indexer.h:105
int64_t dtype_byte_size_
Definition Indexer.h:194
void Permute(const SizeVector &dims)
Permute (dimension shuffle) the reference to a Tensor.
Definition Indexer.h:131
int64_t ndims_
Definition Indexer.h:193
bool operator!=(const TensorRef &other) const
Definition Indexer.h:185
TensorRef(const Tensor &t)
Definition Indexer.h:111
TensorRef()
Definition Indexer.h:109
int64_t shape_[MAX_DIMS]
Definition Indexer.h:195
bool IsContiguous() const
Returns True if the underlying memory buffer is contiguous.
Definition Indexer.h:163
void * data_ptr_
Definition Indexer.h:192
bool operator==(const TensorRef &other) const
Definition Indexer.h:173
int64_t byte_strides_[MAX_DIMS]
Definition Indexer.h:196
Definition MiniVec.h:24