libvisiontransfer  4.1.5
reconstruct3d.cpp
1 /*******************************************************************************
2  * Copyright (c) 2017 Nerian Vision Technologies
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *******************************************************************************/
14 
15 #include "reconstruct3d.h"
16 #include "visiontransfer/alignedallocator.h"
17 #include <vector>
18 #include <cstring>
19 #include <algorithm>
20 #include <fstream>
21 #include <stdexcept>
22 
23 // SIMD Headers
24 #ifdef __AVX2__
25 #include <immintrin.h>
26 #elif __SSE2__
27 #include <emmintrin.h>
28 #endif
29 
30 using namespace std;
31 
32 /*************** Pimpl class containing all private members ***********/
33 
34 class Reconstruct3D::Pimpl {
35 public:
36  Pimpl();
37 
38  float* createPointMap(const unsigned short* dispMap, int width, int height,
39  int rowStride, const float* q, unsigned short minDisparity);
40 
41  float* createPointMap(const ImagePair& imagePair, unsigned short minDisparity);
42 
43  void writePlyFile(const char* file, const unsigned short* dispMap,
44  const unsigned char* image, int width, int height,
45  int dispRowStride, int imageRowStride, const float* q,
46  double maxZ);
47 
48  void writePlyFile(const char* file, const ImagePair& imagePair,
49  double maxZ);
50 
51 private:
52  std::vector<float, AlignedAllocator<float> > pointMap;
53 
54  float* createPointMapFallback(const unsigned short* dispMap, int width, int height,
55  int rowStride, const float* q, unsigned short minDisparity);
56 
57  float* createPointMapSSE2(const unsigned short* dispMap, int width, int height,
58  int rowStride, const float* q, unsigned short minDisparity);
59 
60  float* createPointMapAVX2(const unsigned short* dispMap, int width, int height,
61  int rowStride, const float* q, unsigned short minDisparity);
62 };
63 
64 /******************** Stubs for all public members ********************/
65 
67  :pimpl(new Pimpl) {
68 }
69 
70 Reconstruct3D::~Reconstruct3D() {
71  delete pimpl;
72 }
73 
74 float* Reconstruct3D::createPointMap(const unsigned short* dispMap, int width, int height,
75  int rowStride, const float* q, unsigned short minDisparity) {
76  return pimpl->createPointMap(dispMap, width, height, rowStride, q, minDisparity);
77 }
78 
79 float* Reconstruct3D::createPointMap(const ImagePair& imagePair, unsigned short minDisparity) {
80  return pimpl->createPointMap(imagePair, minDisparity);
81 }
82 
83 void Reconstruct3D::writePlyFile(const char* file, const unsigned short* dispMap,
84  const unsigned char* image, int width, int height,
85  int dispRowStride, int imageRowStride, const float* q, double maxZ) {
86  pimpl->writePlyFile(file, dispMap, image, width, height, dispRowStride,
87  imageRowStride, q, maxZ);
88 }
89 
90 void Reconstruct3D::writePlyFile(const char* file, const ImagePair& imagePair,
91  double maxZ) {
92  pimpl->writePlyFile(file, imagePair, maxZ);
93 }
94 
95 /******************** Implementation in pimpl class *******************/
96 
97 Reconstruct3D::Pimpl::Pimpl() {
98 }
99 
100 float* Reconstruct3D::Pimpl::createPointMap(const unsigned short* dispMap, int width,
101  int height, int rowStride, const float* q, unsigned short minDisparity) {
102 
103  // Allocate the buffer
104  if(pointMap.size() != static_cast<unsigned int>(4*width*height)) {
105  pointMap.resize(4*width*height);
106  }
107 
108 # ifdef __AVX2__
109  return createPointMapAVX2(dispMap, width, height, rowStride, q, minDisparity);
110 # elif __SSE2__
111  return createPointMapSSE2(dispMap, width, height, rowStride, q, minDisparity);
112 # else
113  return createPointMapFallback(dispMap, width, height, rowStride, q, minDisparity);
114 # endif
115 }
116 
117 float* Reconstruct3D::Pimpl::createPointMap(const ImagePair& imagePair, unsigned short minDisparity) {
118  if(imagePair.getPixelFormat(1) != ImagePair::FORMAT_12_BIT) {
119  throw std::runtime_error("Disparity map must have 12-bit pixel format!");
120  }
121 
122  return createPointMap(reinterpret_cast<unsigned short*>(imagePair.getPixelData(1)), imagePair.getWidth(),
123  imagePair.getHeight(), imagePair.getRowStride(1), imagePair.getQMatrix(), minDisparity);
124 }
125 
126 float* Reconstruct3D::Pimpl::createPointMapFallback(const unsigned short* dispMap, int width,
127  int height, int rowStride, const float* q, unsigned short minDisparity) {
128  // Code without SSE or AVX optimization
129  float* outputPtr = &pointMap[0];
130  int stride = rowStride / 2;
131 
132  for(int y = 0; y < height; y++) {
133  double qx = q[1]*y + q[3];
134  double qy = q[5]*y + q[7];
135  double qz = q[9]*y + q[11];
136  double qw = q[13]*y + q[15];
137 
138  for(int x = 0; x < width; x++) {
139  unsigned short intDisp = std::max(minDisparity, dispMap[y*stride + x]);
140  if(intDisp >= 0xFFF) {
141  intDisp = minDisparity; // Invalid disparity
142  }
143 
144  double d = intDisp / 16.0;
145  double w = qw + q[14]*d;
146 
147  *outputPtr = static_cast<float>((qx + q[2]*d)/w); // x
148  outputPtr++;
149 
150  *outputPtr = static_cast<float>((qy + q[6]*d)/w); // y
151  outputPtr++;
152 
153  *outputPtr = static_cast<float>((qz + q[10]*d)/w); // z
154  outputPtr+=2; // Consider padding
155 
156  qx += q[0];
157  qy += q[4];
158  qz += q[8];
159  qw += q[12];
160  }
161  }
162  return &pointMap[0];
163 }
164 
165 # ifdef __AVX2__
166 float* Reconstruct3D::Pimpl::createPointMapAVX2(const unsigned short* dispMap, int width,
167  int height, int rowStride, const float* q, unsigned short minDisparity) {
168 
169  // Create column vectors of q
170  const __m256 qCol0 = _mm256_setr_ps(q[0], q[4], q[8], q[12], q[0], q[4], q[8], q[12]);
171  const __m256 qCol1 = _mm256_setr_ps(q[1], q[5], q[9], q[13], q[1], q[5], q[9], q[13]);
172  const __m256 qCol2 = _mm256_setr_ps(q[2], q[6], q[10], q[14], q[2], q[6], q[10], q[14]);
173  const __m256 qCol3 = _mm256_setr_ps(q[3], q[7], q[11], q[15], q[3], q[7], q[11], q[15]);
174 
175  // More constants that we need
176  const __m256i minDispVector = _mm256_set1_epi16(minDisparity);
177  const __m256i maxDispVector = _mm256_set1_epi16(0xFFF);
178  const __m256 scaleVector = _mm256_set1_ps(1.0/16.0);
179  const __m256i zeroVector = _mm256_set1_epi16(0);
180 
181  float* outputPtr = &pointMap[0];
182 
183  for(int y = 0; y < height; y++) {
184  const unsigned char* rowStart = &reinterpret_cast<const unsigned char*>(dispMap)[y*rowStride];
185  const unsigned char* rowEnd = &reinterpret_cast<const unsigned char*>(dispMap)[y*rowStride + 2*width];
186 
187  int x = 0;
188  for(const unsigned char* ptr = rowStart; ptr != rowEnd; ptr += 32) {
189  __m256i disparities = _mm256_load_si256(reinterpret_cast<const __m256i*>(ptr));
190 
191  // Find invalid disparities and set them to 0
192  __m256i validMask = _mm256_cmpgt_epi16(maxDispVector, disparities);
193  disparities = _mm256_and_si256(validMask, disparities);
194 
195  // Clamp to minimum disparity
196  disparities = _mm256_max_epi16(disparities, minDispVector);
197 
198  // Stupid AVX2 unpack mixes everything up! Lets swap the register beforehand.
199  __m256i disparitiesMixup = _mm256_permute4x64_epi64(disparities, 0xd8);
200 
201  // Convert to floats and scale with 1/16
202  __m256 floatDisp = _mm256_cvtepi32_ps(_mm256_unpacklo_epi16(disparitiesMixup, zeroVector));
203  __m256 dispScaled = _mm256_mul_ps(floatDisp, scaleVector);
204 
205  // Copy to array
206 #ifdef _MSC_VER
207  __declspec(align(32)) float dispArray[16];
208 #else
209  float dispArray[16]__attribute__((aligned(32)));
210 #endif
211  _mm256_store_ps(&dispArray[0], dispScaled);
212 
213  // Same for other half
214  floatDisp = _mm256_cvtepi32_ps(_mm256_unpackhi_epi16(disparitiesMixup, zeroVector));
215  dispScaled = _mm256_mul_ps(floatDisp, scaleVector);
216  _mm256_store_ps(&dispArray[8], dispScaled);
217 
218  // Iterate over disparities and perform matrix multiplication for each
219  for(int i=0; i<16; i+=2) {
220  // Create two vectors
221  __m256 vec = _mm256_setr_ps(x, y, dispArray[i], 1.0,
222  x+1, y, dispArray[i+1], 1.0);
223 
224  // Multiply with matrix
225  __m256 u1 = _mm256_shuffle_ps(vec,vec, _MM_SHUFFLE(0,0,0,0));
226  __m256 u2 = _mm256_shuffle_ps(vec,vec, _MM_SHUFFLE(1,1,1,1));
227  __m256 u3 = _mm256_shuffle_ps(vec,vec, _MM_SHUFFLE(2,2,2,2));
228  __m256 u4 = _mm256_shuffle_ps(vec,vec, _MM_SHUFFLE(3,3,3,3));
229 
230  __m256 prod1 = _mm256_mul_ps(u1, qCol0);
231  __m256 prod2 = _mm256_mul_ps(u2, qCol1);
232  __m256 prod3 = _mm256_mul_ps(u3, qCol2);
233  __m256 prod4 = _mm256_mul_ps(u4, qCol3);
234 
235  __m256 multResult = _mm256_add_ps(_mm256_add_ps(prod1, prod2), _mm256_add_ps(prod3, prod4));
236 
237  // Divide by w to receive point coordinates
238  __m256 point = _mm256_div_ps(multResult,
239  _mm256_shuffle_ps(multResult,multResult, _MM_SHUFFLE(3,3,3,3)));
240 
241  // Write result to memory
242  _mm256_store_ps(outputPtr, point);
243 
244  outputPtr += 8;
245  x+=2;
246  }
247  }
248  }
249 
250  return &pointMap[0];
251 }
252 #endif
253 
254 #ifdef __SSE2__
255 float* Reconstruct3D::Pimpl::createPointMapSSE2(const unsigned short* dispMap, int width,
256  int height, int rowStride, const float* q, unsigned short minDisparity) {
257 
258  // Create column vectors of q
259  const __m128 qCol0 = _mm_setr_ps(q[0], q[4], q[8], q[12]);
260  const __m128 qCol1 = _mm_setr_ps(q[1], q[5], q[9], q[13]);
261  const __m128 qCol2 = _mm_setr_ps(q[2], q[6], q[10], q[14]);
262  const __m128 qCol3 = _mm_setr_ps(q[3], q[7], q[11], q[15]);
263 
264  // More constants that we need
265  const __m128i minDispVector = _mm_set1_epi16(minDisparity);
266  const __m128i maxDispVector = _mm_set1_epi16(0xFFF);
267  const __m128 scaleVector = _mm_set1_ps(1.0/16.0);
268  const __m128i zeroVector = _mm_set1_epi16(0);
269 
270  float* outputPtr = &pointMap[0];
271 
272  for(int y = 0; y < height; y++) {
273  const unsigned char* rowStart = &reinterpret_cast<const unsigned char*>(dispMap)[y*rowStride];
274  const unsigned char* rowEnd = &reinterpret_cast<const unsigned char*>(dispMap)[y*rowStride + 2*width];
275 
276  int x = 0;
277  for(const unsigned char* ptr = rowStart; ptr != rowEnd; ptr += 16) {
278  __m128i disparities = _mm_load_si128(reinterpret_cast<const __m128i*>(ptr));
279 
280  // Find invalid disparities and set them to 0
281  __m128i validMask = _mm_cmplt_epi16(disparities, maxDispVector);
282  disparities = _mm_and_si128(validMask, disparities);
283 
284  // Clamp to minimum disparity
285  disparities = _mm_max_epi16(disparities, minDispVector);
286 
287  // Convert to floats and scale with 1/16
288  __m128 floatDisp = _mm_cvtepi32_ps(_mm_unpacklo_epi16(disparities, zeroVector));
289  __m128 dispScaled = _mm_mul_ps(floatDisp, scaleVector);
290 
291  // Copy to array
292 #ifdef _MSC_VER
293  __declspec(align(16)) float dispArray[8];
294 #else
295  float dispArray[8]__attribute__((aligned(16)));
296 #endif
297  _mm_store_ps(&dispArray[0], dispScaled);
298 
299  // Same for other half
300  floatDisp = _mm_cvtepi32_ps(_mm_unpackhi_epi16(disparities, zeroVector));
301  dispScaled = _mm_mul_ps(floatDisp, scaleVector);
302  _mm_store_ps(&dispArray[4], dispScaled);
303 
304  // Iterate over disparities and perform matrix multiplication for each
305  for(int i=0; i<8; i++) {
306  // Create vector
307  __m128 vec = _mm_setr_ps(static_cast<float>(x), static_cast<float>(y), dispArray[i], 1.0);
308 
309  // Multiply with matrix
310  __m128 u1 = _mm_shuffle_ps(vec,vec, _MM_SHUFFLE(0,0,0,0));
311  __m128 u2 = _mm_shuffle_ps(vec,vec, _MM_SHUFFLE(1,1,1,1));
312  __m128 u3 = _mm_shuffle_ps(vec,vec, _MM_SHUFFLE(2,2,2,2));
313  __m128 u4 = _mm_shuffle_ps(vec,vec, _MM_SHUFFLE(3,3,3,3));
314 
315  __m128 prod1 = _mm_mul_ps(u1, qCol0);
316  __m128 prod2 = _mm_mul_ps(u2, qCol1);
317  __m128 prod3 = _mm_mul_ps(u3, qCol2);
318  __m128 prod4 = _mm_mul_ps(u4, qCol3);
319 
320  __m128 multResult = _mm_add_ps(_mm_add_ps(prod1, prod2), _mm_add_ps(prod3, prod4));
321 
322  // Divide by w to receive point coordinates
323  __m128 point = _mm_div_ps(multResult,
324  _mm_shuffle_ps(multResult,multResult, _MM_SHUFFLE(3,3,3,3)));
325 
326  // Write result to memory
327  _mm_store_ps(outputPtr, point);
328 
329  outputPtr += 4;
330  x++;
331  }
332  }
333  }
334 
335  return &pointMap[0];
336 }
337 #endif
338 
339 void Reconstruct3D::Pimpl::writePlyFile(const char* file, const unsigned short* dispMap,
340  const unsigned char* image, int width, int height, int dispRowStride, int imageRowStride,
341  const float* q, double maxZ) {
342 
343  float* pointMap = createPointMap(dispMap, width, height, dispRowStride, q, 1);
344 
345  // Count number of valid points
346  int pointsCount = 0;
347  for(int i=0; i<width*height; i++) {
348  if(pointMap[4*i+2] <= maxZ) {
349  pointsCount++;
350  }
351  }
352 
353  // Write file header
354  fstream strm(file, ios::out);
355  strm << "ply" << endl
356  << "format ascii 1.0" << endl
357  << "element vertex " << pointsCount << endl
358  << "property float x" << endl
359  << "property float y" << endl
360  << "property float z" << endl
361  << "property uchar red" << endl
362  << "property uchar green" << endl
363  << "property uchar blue" << endl
364  << "end_header" << endl;
365 
366  // Write points
367  for(int i=0; i<width*height; i++) {
368  int y = i / width;
369  int x = i % width;
370  int col = static_cast<int>(image[y*imageRowStride + x]);
371 
372  if(pointMap[4*i+2] <= maxZ) {
373  strm << pointMap[4*i]
374  << " " << pointMap[4*i + 1]
375  << " " << pointMap[4*i + 2]
376  << " " << col << " " << col << " " << col << endl;
377  }
378  }
379 }
380 
381 void Reconstruct3D::Pimpl::writePlyFile(const char* file, const ImagePair& imagePair,
382  double maxZ) {
383  if(imagePair.getPixelFormat(0) != ImagePair::FORMAT_8_BIT) {
384  throw std::runtime_error("Camera image must have 8-bit pixel format!");
385  }
386  if(imagePair.getPixelFormat(1) != ImagePair::FORMAT_12_BIT) {
387  throw std::runtime_error("Disparity map must have 12-bit pixel format!");
388  }
389 
390  writePlyFile(file, reinterpret_cast<unsigned short*>(imagePair.getPixelData(1)),
391  imagePair.getPixelData(0), imagePair.getWidth(), imagePair.getHeight(),
392  imagePair.getRowStride(1), imagePair.getRowStride(0), imagePair.getQMatrix(),
393  maxZ);
394 }
float * createPointMap(const unsigned short *dispMap, int width, int height, int rowStride, const float *q, unsigned short minDisparity=1)
Reconstructs the 3D location of each pixel in the given disparity map.
unsigned char * getPixelData(int imageNumber) const
Returns the pixel data for the given image.
Definition: imagepair.h:182
const float * getQMatrix() const
Returns a pointer to the disparity-to-depth mapping matrix q.
Definition: imagepair.h:190
int getWidth() const
Returns the width of each image.
Definition: imagepair.h:147
Reconstruct3D()
Constructs a new object for 3D reconstructing.
A set of two images, which are usually the left camera image and the disparity map.
Definition: imagepair.h:30
ImageFormat getPixelFormat(int imageNumber) const
Returns the pixel format for the given image.
Definition: imagepair.h:171
void writePlyFile(const char *file, const unsigned short *dispMap, const unsigned char *image, int width, int height, int dispRowStride, int imageRowStride, const float *q, double maxZ=std::numeric_limits< double >::max())
Projects the given disparity map to 3D points and exports the result to a PLY file.
8-bit greyscale format
Definition: imagepair.h:37
int getHeight() const
Returns the height of each image.
Definition: imagepair.h:152
int getRowStride(int imageNumber) const
Returns the row stride for the pixel data of one image.
Definition: imagepair.h:160
Nerian Vision Technologies