Tutorial to Use GPU ORB Extractor Feature#
This tutorial shows how to use GPU orb-extractor feature library API.
The GPU orb-extractor feature library offers thread-safe support for both single and multiple cameras.
This tutorial illustrates GPU orb-extractor feature library usage with OpenCV cv::Mat and cv::Keypoints
.
It explains employing multiple CPU threads with multiple ORB extractor objects, as well as using a single orb-extractor feature object to handle multiple camera inputs.
The multithread feature provides more flexibility for visual SLAM to call multiple objects of the orb-extractor feature library.
Note
This tutorial can be run both inside and outside a Docker* image. We assume that the liborb-lze-dev
Deb package has been installed,
and the user has copied the tutorial directory from /opt/intel/orb_lze/samples/
to a user-writable directory.
Prepare the environment:
sudo apt install liborb-lze-dev cp -r /opt/intel/orb_lze/samples/ ~/orb_lze_samples cd ~/orb_lze_samples/
main.cpp
should be in the directory with following content:1// SPDX-License-Identifier: Apache-2.0 2// Copyright (C) 2025 Intel Corporation 3#include "orb_extractor.h" 4#include "cmd_parser.h" 5#include <opencv2/opencv.hpp> 6#include <opencv2/features2d.hpp> 7#include <opencv2/highgui/highgui.hpp> 8#include <fstream> 9#include <chrono> 10#include <memory> 11#include <thread> 12 13using namespace std; 14 15constexpr uint32_t max_num_keypts_ = 2000; 16constexpr int num_levels_ = 8; 17constexpr int ini_fast_thr_ = 20; 18constexpr int min_fast_thr_ = 7; 19constexpr float scale_factor_ = 1.2f; 20 21struct All_Images 22{ 23 std::string image_title; 24 cv::Mat img; 25}; 26 27std::vector<All_Images> gl_images; 28 29inline double getTimeStamp() 30{ 31 std::chrono::system_clock::duration d = std::chrono::system_clock::now().time_since_epoch(); 32 std::chrono::seconds s = std::chrono::duration_cast<std::chrono::seconds>(d); 33 return s.count() + (std::chrono::duration_cast<std::chrono::microseconds>(d - s).count()) / 1e6; 34} 35 36void extract(int num_cam, const std::string& image_path, const std::string& thread_name, int iterations) 37{ 38 int num_of_cameras = num_cam; 39 std::vector<cv::Mat> all_images; 40 all_images.resize(num_of_cameras); 41 for(int i = 0; i < num_of_cameras; i++) 42 { 43 all_images[i] = cv::imread(image_path, cv::IMREAD_GRAYSCALE); 44 } 45 46 std::vector<std::vector<KeyType>> keypts(num_of_cameras); 47 std::vector<MatType> all_descriptors(num_of_cameras); 48 49#ifdef OPENCV_FREE 50 Mat2d *images = new Mat2d[num_of_cameras]; 51 std::vector<MatType> in_image_array; 52 for( int i = 0; i < num_of_cameras; i++) 53 { 54 images[i] = Mat2d(all_images[i].rows, all_images[i].cols, all_images[i].data); 55 in_image_array.push_back(images[i]); 56 } 57 std::vector<MatType> in_image_mask_array; 58 std::vector<MatType> descriptor_array; 59#else 60 const cv::_InputArray in_image_array(all_images); 61 const cv::_InputArray in_image_mask_array; 62 const cv::_OutputArray descriptor_array(all_descriptors); 63#endif 64 65 std::vector<std::vector<float>> mask_rect; 66 67 std::string thread_id = thread_name; 68 69 try 70 { 71 auto extractor = std::make_shared<orb_extractor>(max_num_keypts_, scale_factor_, num_levels_, ini_fast_thr_, min_fast_thr_, num_of_cameras, mask_rect); 72 extractor->set_gpu_kernel_path(ORBLZE_KERNEL_PATH_STRING); 73 74 double total_host_time = 0.0; 75 76 for (int i = 0; i < iterations; i++) 77 { 78 std::cout << "iteration " << i+1 <<"/" << iterations << "\r"; 79 std::cout.flush(); 80 double host_start = getTimeStamp(); 81 extractor->extract(in_image_array, in_image_mask_array, keypts, descriptor_array); 82 double host_end = getTimeStamp(); 83 double host_time_diff = (host_end - host_start)/(float)iterations; 84 total_host_time += host_time_diff; 85 } 86 87 std::cout << "\n" << thread_id << ": gpu host time=" << total_host_time*1000.0 << std::endl; 88 } 89 catch(const std::exception& e) 90 { 91 std::cout << "\n Exception caught:" << e.what(); 92 exit(1); 93 } 94 std::vector<std::vector<cv::KeyPoint>> all_keypts(num_of_cameras); 95 96#ifdef OPENCV_FREE 97 for(int i=0; i < num_of_cameras; i++) 98 { 99 auto& gpu_keypts = keypts.at(i); 100 for (int pt=0; pt < gpu_keypts.size(); pt++) 101 { 102 all_keypts[i].emplace_back(cv::KeyPoint(gpu_keypts[pt].x, gpu_keypts[pt].y, 103 gpu_keypts[pt].size, gpu_keypts[pt].angle, gpu_keypts[pt].response, 104 gpu_keypts[pt].octave, -1)); 105 } 106 } 107#else 108 for(int i=0; i < num_of_cameras; i++) 109 { 110 all_keypts.at(i) = keypts.at(i); 111 } 112#endif 113 114 std::vector<cv::Mat> out; 115 out.resize(num_of_cameras); 116 117 thread_id = thread_id + "_and_"; 118 119 for( int i = 0; i < num_of_cameras; i++) 120 { 121 out.at(i).create(all_images.at(i).rows, all_images.at(i).cols, CV_8U); 122 cv::drawKeypoints(all_images.at(i), all_keypts[i], out[i], cv::Scalar(255,0,0)); 123 char no[20]; 124 sprintf(no,"Img:%d",i+1); 125 All_Images obj; 126 obj.image_title = thread_id + no; 127 obj.img = out[i]; 128 gl_images.push_back(obj); 129 } 130} 131 132int main(int argc, char** argv) 133{ 134 if(!ParseCommandLine(argc, argv)) 135 { 136 return 0; 137 } 138 139 const int num_images = FLAGS_images; 140 const int num_of_threads = FLAGS_threads; 141 const int num_of_iter = FLAGS_iterations; 142 std::string image_path = FLAGS_image_path; 143 144 std::vector<std::thread> threads; 145 146 for (int i = 0; i < num_of_threads; ++i) 147 { 148 std::string thread_name = "Thread:" + std::to_string(i + 1); 149 threads.emplace_back(extract, num_images, image_path.c_str(), thread_name, num_of_iter); 150 } 151 for (auto& thread : threads) 152 thread.join(); 153 154 //show the images 155 for (int i = 0; i < (num_images * num_of_threads); i++) 156 { 157 cv::imshow(gl_images[i].image_title, gl_images[i].img); 158 } 159 cv::waitKey(0); 160 161 return 0; 162}
Build the code:
mkdir build && cd build cmake ../ make -j
Run the binary:
./feature_extract -h Following are the command line arguments: Usage: ./feature_extract --images=<> --image_path=<> --threads=<> --images <integer> : Number of images or number of cameras. Default value: 1 --image_path <string> : Path to input image files. Default value: image.jpg --threads <integer> : Number of threads to run. Default value: 1 --iterations <integer> : Number of iterations to run. Default value: 10 The following command, it will run four threads, each thread is taking two cameras image input. ./feature_extract --images=2 --threads=4
Expected results example:
./feature_extract --images=2 --threads=4 iteration 10/10 Thread:2: gpu host time=21.4233 iteration 10/10 Thread:1: gpu host time=21.133 iteration 10/10 Thread:4: gpu host time=20.9086 iteration 10/10 Thread:3: gpu host time=20.6155
After executing, the input image will display keypoints in blue color dots.
Note
Here, you can specify the number of images per thread and the number of threads to be executed. You have the option to process multiple image inputs within a single thread of the extract API or to process a single or more images input using multiple threads with extract API calls.
Code Explanation#
Configuration for the ORB extractor:
using namespace std;
constexpr uint32_t max_num_keypts_ = 2000;
constexpr int num_levels_ = 8;
constexpr int ini_fast_thr_ = 20;
Initialize the input and output parameters:
{
int num_of_cameras = num_cam;
std::vector<cv::Mat> all_images;
all_images.resize(num_of_cameras);
for(int i = 0; i < num_of_cameras; i++)
{
all_images[i] = cv::imread(image_path, cv::IMREAD_GRAYSCALE);
}
std::vector<std::vector<KeyType>> keypts(num_of_cameras);
std::vector<MatType> all_descriptors(num_of_cameras);
#ifdef OPENCV_FREE
Mat2d *images = new Mat2d[num_of_cameras];
std::vector<MatType> in_image_array;
for( int i = 0; i < num_of_cameras; i++)
{
images[i] = Mat2d(all_images[i].rows, all_images[i].cols, all_images[i].data);
in_image_array.push_back(images[i]);
}
std::vector<MatType> in_image_mask_array;
std::vector<MatType> descriptor_array;
#else
const cv::_InputArray in_image_array(all_images);
const cv::_InputArray in_image_mask_array;
Create orb_extract object:
try
Set gpu kernel path: Specify the path to GPU binaries such as gaussian_genx.bin, resize_genx.bin.
{
Note
The macro ORBLZE_KERNEL_PATH_STRING is defined as “/usr/lib/x86_64-linux-gnu” in the header file config.h
.
This header file is installed by the Deb package liborb-lze-dev
at /usr/include/config.h.
Call the extract function to output the keypoints and descriptors for all camera input images. Depending on the number of camera inputs, the orb-extractor feature library returns the number of the keypoints vector and the descriptors vector.
std::cout.flush();
Draw the keypoints on the image. Keypoints are drawn on the image and stored in respective CV:Mat vector.
#endif
std::vector<cv::Mat> out;
out.resize(num_of_cameras);
thread_id = thread_id + "_and_";
for( int i = 0; i < num_of_cameras; i++)
{
out.at(i).create(all_images.at(i).rows, all_images.at(i).cols, CV_8U);
cv::drawKeypoints(all_images.at(i), all_keypts[i], out[i], cv::Scalar(255,0,0));
char no[20];
sprintf(no,"Img:%d",i+1);
All_Images obj;
obj.image_title = thread_id + no;
obj.img = out[i];
Create multiple threads. Each thread will create one orb-extractor feature object.
std::string image_path = FLAGS_image_path;
std::vector<std::thread> threads;
for (int i = 0; i < num_of_threads; ++i)
{
std::string thread_name = "Thread:" + std::to_string(i + 1);
threads.emplace_back(extract, num_images, image_path.c_str(), thread_name, num_of_iter);
}
Display images:
thread.join();
//show the images
for (int i = 0; i < (num_images * num_of_threads); i++)
{
cv::imshow(gl_images[i].image_title, gl_images[i].img);