Git Product home page Git Product logo

verlab / accelerated_features Goto Github PK

View Code? Open in Web Editor NEW
901.0 901.0 92.0 26.42 MB

Implementation of XFeat (CVPR 2024). Do you need robust and fast local feature extraction? You are in the right place!

Home Page: https://www.verlab.dcc.ufmg.br/descriptors/xfeat_cvpr24

License: Apache License 2.0

Python 2.04% Jupyter Notebook 97.96%
descriptors image-matching image-registration keypoints lightweight local-features real-time

accelerated_features's People

Contributors

felipecadar avatar guipotje avatar iamshubhamgupto avatar qubvel avatar

Stargazers

 avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar

Watchers

 avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar

accelerated_features's Issues

Image registration based on xfeat c++

Thanks to @acai66 for the reference code. it has helped alot. There code has been modifed a bit

For the followings i had help from stackoverflow

Step1: Install onnxruntime

mkdir /tmp/onnxInstall
cd /tmp/onnxInstall
wget -O onnx_archive.nupkg https://www.nuget.org/api/v2/package/Microsoft.ML.OnnxRuntime/1.7.0
unzip onnx_archive.nupkg
cp runtimes/linux-x64/native/libonnxruntime.so ~/.local/lib/
cp -r build/native/include/ ~/.local/include/onnxruntime/

Step2: Cmake

Now if you want to be able to find_package(onnxruntime) from your Cmake package, I suggest you place my self-created onnx cmake files in ~/.local/share/cmake/onnxruntime. The files are:

create a ~/.local/share/cmake/onnxruntime/onnxruntimeVersion.cmake:

set(PACKAGE_VERSION "1.7.0")
if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}")
set(PACKAGE_VERSION_COMPATIBLE FALSE)
else()
set(PACKAGE_VERSION_COMPATIBLE TRUE)
if("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}")
set(PACKAGE_VERSION_EXACT TRUE)
endif()
endif()

Step3: create a file ~/.local/share/cmake/onnxruntime/onnxruntimeConfig.cmake

include(FindPackageHandleStandardArgs)

///////////// Assume we are in /share/cmake/onnxruntime/onnxruntimeConfig.cmake
get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
get_filename_component(onnxruntime_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE)

set(onnxruntime_INCLUDE_DIRS ${onnxruntime_INSTALL_PREFIX}/include)
set(onnxruntime_LIBRARIES onnxruntime)
set(onnxruntime_CXX_FLAGS "") # no flags needed

find_library(onnxruntime_LIBRARY onnxruntime
PATHS "${onnxruntime_INSTALL_PREFIX}/lib"
)

add_library(onnxruntime SHARED IMPORTED)
set_property(TARGET onnxruntime PROPERTY IMPORTED_LOCATION "${onnxruntime_LIBRARY}")
set_property(TARGET onnxruntime PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_INCLUDE_DIRS}")
set_property(TARGET onnxruntime PROPERTY INTERFACE_COMPILE_OPTIONS "${onnxruntime_CXX_FLAGS}")

find_package_handle_standard_args(onnxruntime DEFAULT_MSG onnxruntime_LIBRARY onnxruntime_INCLUDE_DIRS)

Step4: Project Cmakelist.txt

cmake_minimum_required(VERSION 3.0)
project(YourProjectName)
#set(OpenCV_DIR "path/to/opencv/build")
include_directories("~/.local/include/onnxruntime/")
find_package(OpenCV REQUIRED)
find_package(onnxruntime)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED True)

include_directories(${ONNXRUNTIME_INCLUDE_DIR})
include_directories(${OpenCV_INCLUDE_DIRS})

add_executable(ContourDetection demo.cpp)
target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS})
target_link_libraries(${PROJECT_NAME} "~/.local/lib/libonnxruntime.so")

Step5: Create a symlink between onnxruntime shared library

ln -s ~/.local/lib/libonnxruntime.so /usr/local/lib/onnxruntime.so.1.7.0

The modified c++ code

#include <chrono>
#include <iostream>
#include "omp.h"

#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>


// for onnx model path
const ORTCHAR_T* stringToOrtchar_t(std::string const& s)
{
#ifdef _WIN32
	const char* CStr = s.c_str();
	size_t len = strlen(CStr) + 1;
	size_t converted = 0;
	wchar_t* WStr;
	WStr = (wchar_t*)malloc(len * sizeof(wchar_t));
	mbstowcs_s(&converted, WStr, len, CStr, _TRUNCATE);

	return WStr;
#else
	return s.c_str();
#endif // _WIN32
}

bool initOrtSession(const Ort::Env& env, Ort::Session& session, std::string& modelPath, const int& gpuId = 0)
{
	const ORTCHAR_T* ortModelPath = stringToOrtchar_t(modelPath);

	bool sessionIsAvailable = false;
	/* 
	if (sessionIsAvailable == false)
	{
		try
		{
			Ort::SessionOptions session_options;
			session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);

			// try Tensorrt 
			OrtTensorRTProviderOptions trtOptions{};
			trtOptions.device_id = gpuId;
			trtOptions.trt_fp16_enable = 1;
			trtOptions.trt_engine_cache_enable = 1;
			trtOptions.trt_engine_cache_path = "./trt_engine_cache";


			trtOptions.trt_max_workspace_size = (size_t)4 * 1024 * 1024 * 1024;

			session_options.AppendExecutionProvider_TensorRT(trtOptions);

			session = Ort::Session(env, ortModelPath, session_options);
			
			sessionIsAvailable = true;
			std::cout << "Using accelerator: Tensorrt" << std::endl;
		}
		catch (Ort::Exception e)
		{
			std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl;
			std::cout << "Failed to init Tensorrt accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
		catch (...)
		{
			std::cout << "Failed to init Tensorrt accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
	}
	*/

	if (sessionIsAvailable == false)
	{
		try
		{
			Ort::SessionOptions session_options;
			session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);

			OrtCUDAProviderOptions cuda0ptions;
			cuda0ptions.device_id = gpuId;
			cuda0ptions.cuda_mem_limit = 4 << 30;

			session_options.AppendExecutionProvider_CUDA(cuda0ptions);

			session = Ort::Session(const_cast<Ort::Env&>(env), ortModelPath, session_options);
			
			sessionIsAvailable = true;
			std::cout << "Using accelerator: CUDA" << std::endl;
		}
		catch (Ort::Exception e)
		{
			std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl;
			std::cout << "Failed to init CUDA accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
		catch (...)
		{
			std::cout << "Failed to init CUDA accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
	}
	if (sessionIsAvailable == false)
	{
		try
		{
			Ort::SessionOptions session_options;
			session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);

			session = Ort::Session(const_cast<Ort::Env&>(env), ortModelPath, session_options);
			
			sessionIsAvailable = true;
			std::cout << "Using accelerator: CPU" << std::endl;
		}
		catch (Ort::Exception e)
		{
			std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl;
			std::cout << "Failed to init CPU accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
		catch (...)
		{
			std::cout << "Failed to init CPU accelerator." << std::endl;
			sessionIsAvailable = false;
		}
	}

	if (sessionIsAvailable == true)
	{
		Ort::AllocatorWithDefaultOptions allocator;
		// Get input layers count
		size_t num_input_nodes = session.GetInputCount();

		// Get input layer type, shape, name
		for (int i = 0; i < num_input_nodes; i++)
		{
			
			// Name
			std::string input_name = session.GetInputName(i, allocator); //std::string(session.GetInputName(i, allocator).get());

			std::cout << "Input " << i << ": " << input_name << ", shape: (";

			// Type
			Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
			auto tensor_info = type_info.GetTensorTypeAndShapeInfo();

			ONNXTensorElementDataType type = tensor_info.GetElementType();

			// Shape
			std::vector<int64_t> input_node_dims = tensor_info.GetShape();

			for (int j = 0; j < input_node_dims.size(); j++) {
				std::cout << input_node_dims[j];
				if (j == input_node_dims.size() - 1)
				{
					std::cout << ")" << std::endl; 
				}
				else
				{
					std::cout << ", ";
				}
			}
		}

		// Get output layers count
		size_t num_output_nodes = session.GetOutputCount();

		// Get output layer type, shape, name
		for (int i = 0; i < num_output_nodes; i++) {
			// Name
			std::string output_name =  session.GetOutputName(i, allocator);
			std::cout << "Output " << i << ": " << output_name << ", shape: (";

			// type
			Ort::TypeInfo type_info = session.GetOutputTypeInfo(i);
			auto tensor_info = type_info.GetTensorTypeAndShapeInfo();

			ONNXTensorElementDataType type = tensor_info.GetElementType();

			// shape
			std::vector<int64_t> output_node_dims = tensor_info.GetShape();
			for (int j = 0; j < output_node_dims.size(); j++) {
				std::cout << output_node_dims[j];
				if (j == output_node_dims.size() - 1)
				{
					std::cout << ")" << std::endl; 
				}
				else
				{
					std::cout << ", ";
				}
			}
		}
		
	}
	else
	{
		std::cout << modelPath << " is invalid model." << std::endl;
	}

	return sessionIsAvailable;
}


class XFeat
{
public:
	XFeat(std::string &xfeatModelPath, std::string& matchingModelPath);
	int detectAndCompute(const cv::Mat &image, cv::Mat &mkpts, cv::Mat& feats, cv::Mat& sc);
	int matchStar(const cv::Mat& mkpts0, const cv::Mat& feats0, const cv::Mat& sc0, const cv::Mat& mkpts1, const cv::Mat& feats1, cv::Mat& matches, cv::Mat& batch_indexes);

	~XFeat();

	// gpu id
	int gpuId_ = 0;

	// onnxruntime
	Ort::Env env_{ nullptr };
	Ort::Session xfeatSession_{ nullptr };
	Ort::Session matchingSession_{ nullptr };
	Ort::AllocatorWithDefaultOptions allocator;

	//
	std::vector<const char*> xfeatInputNames = { "images" };
	std::vector<const char*> xfeatOutputNames = { "mkpts", "feats", "sc" };
	std::vector<const char*> matchingInputNames = { "mkpts0", "feats0", "sc0", "mkpts1", "feats1"};
	std::vector<const char*> matchingOutputNames = { "matches", "batch_indexes" };

	bool initFinishedFlag_ = false;
};

XFeat::XFeat(std::string& xfeatModelPath, std::string& matchingModelPath)
{
	const ORTCHAR_T* ortXfeatModelPath = stringToOrtchar_t(xfeatModelPath);
	const ORTCHAR_T* ortMatchingModelPath = stringToOrtchar_t(matchingModelPath);

	env_ = Ort::Env{ OrtLoggingLevel::ORT_LOGGING_LEVEL_FATAL, "xfeat_demo" };  //  ORT_LOGGING_LEVEL_VERBOSE, ORT_LOGGING_LEVEL_FATAL

	std::vector<std::string> availableProviders = Ort::GetAvailableProviders();
	std::cout << "All available accelerators:" << std::endl;
	for (int i = 0; i < availableProviders.size(); i++)
	{
		std::cout << "  " << i + 1 << ". " << availableProviders[i] << std::endl;
	}
	// init sessions
	initOrtSession(env_, xfeatSession_, xfeatModelPath, gpuId_);
	initOrtSession(env_, matchingSession_, matchingModelPath, gpuId_);
}

XFeat::~XFeat()
{
	env_.release();
	xfeatSession_.release();
	matchingSession_.release();
}

int XFeat::detectAndCompute(const cv::Mat& image, cv::Mat& mkpts, cv::Mat& feats, cv::Mat& sc)
{
	// Pre process
	cv::Mat preProcessedImage = cv::Mat::zeros(image.rows, image.cols, CV_32FC3);
	int stride = preProcessedImage.rows * preProcessedImage.cols;
#pragma omp parallel for 
	for (int i = 0; i < stride; i++) // HWC -> CHW, BGR -> RGB
	{
		*((float*)preProcessedImage.data + i) = (float)*(image.data + i * 3 + 2);
		*((float*)preProcessedImage.data + i + stride) = (float)*(image.data + i * 3 + 1);
		*((float*)preProcessedImage.data + i + stride * 2) = (float)*(image.data + i * 3);
	}

	// Create input tensor
	int64_t input_size = preProcessedImage.rows * preProcessedImage.cols * 3;
	std::vector<int64_t> input_node_dims = { 1, 3, preProcessedImage.rows , preProcessedImage.cols };
	auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
	Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(preProcessedImage.data), input_size, input_node_dims.data(), input_node_dims.size());
	assert(input_tensor.IsTensor());


	// Run sessionn
	auto output_tensors =
		xfeatSession_.Run(Ort::RunOptions{ nullptr }, xfeatInputNames.data(),
			&input_tensor, xfeatInputNames.size(), xfeatOutputNames.data(), xfeatOutputNames.size());
	assert(output_tensors.size() == xfeatOutputNames.size() && output_tensors.front().IsTensor());

	// Get outputs
	auto mkptsShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
	int dim1 = static_cast<int>(mkptsShape[0]); // 1
	int dim2 = static_cast<int>(mkptsShape[1]); // 4800
	int dim3 = static_cast<int>(mkptsShape[2]); // 2
	float* mkptsDataPtr = output_tensors[0].GetTensorMutableData<float>();
	// To cv::Mat
	mkpts = cv::Mat(dim1, dim2, CV_32FC(dim3), mkptsDataPtr).clone();

	auto featsShape = output_tensors[1].GetTensorTypeAndShapeInfo().GetShape();
	dim1 = static_cast<int>(featsShape[0]); // 1
	dim2 = static_cast<int>(featsShape[1]); // 4800
	dim3 = static_cast<int>(featsShape[2]); // 64
	float* featsDataPtr = output_tensors[1].GetTensorMutableData<float>();
	feats = cv::Mat(dim1, dim2, CV_32FC(dim3), featsDataPtr).clone();

	auto scShape = output_tensors[2].GetTensorTypeAndShapeInfo().GetShape();
	dim1 = static_cast<int>(scShape[0]); // 1
	dim2 = static_cast<int>(scShape[1]); // 4800
	float* scDataPtr = output_tensors[2].GetTensorMutableData<float>();
	sc = cv::Mat(dim1, dim2, CV_32F, scDataPtr).clone();

	return 0;
}

int XFeat::matchStar(const cv::Mat& mkpts0, const cv::Mat& feats0, const cv::Mat& sc0, const cv::Mat& mkpts1, const cv::Mat& feats1, cv::Mat& matches, cv::Mat& batch_indexes)
{
    auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);

    int64_t mkpts0_size = mkpts0.total() * mkpts0.elemSize();
    std::vector<int64_t> mkpts0_dims = { mkpts0.rows, mkpts0.cols, mkpts0.channels() };
    Ort::Value mkpts0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(mkpts0.data), mkpts0_size, mkpts0_dims.data(), mkpts0_dims.size());

    int64_t feats0_size = feats0.total() * feats0.elemSize();
    std::vector<int64_t> feats0_dims = { feats0.rows, feats0.cols, feats0.channels() };
    Ort::Value feats0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(feats0.data), feats0_size, feats0_dims.data(), feats0_dims.size());

    int64_t sc0_size = sc0.total() * sc0.elemSize();
    std::vector<int64_t> sc0_dims = { sc0.rows, sc0.cols };
    Ort::Value sc0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(sc0.data), sc0_size, sc0_dims.data(), sc0_dims.size());

    int64_t mkpts1_size = mkpts1.total() * mkpts1.elemSize();
    std::vector<int64_t> mkpts1_dims = { mkpts1.rows, mkpts1.cols, mkpts1.channels() };
    Ort::Value mkpts1_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(mkpts1.data), mkpts1_size, mkpts1_dims.data(), mkpts1_dims.size());

    int64_t feats1_size = feats1.total() * feats1.elemSize();
    std::vector<int64_t> feats1_dims = { feats1.rows, feats1.cols, feats1.channels() };
    Ort::Value feats1_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(feats1.data), feats1_size, feats1_dims.data(), feats1_dims.size());

    // Create input tensors
    std::vector<Ort::Value> input_tensors;
    input_tensors.push_back(std::move(mkpts0_tensor));
    input_tensors.push_back(std::move(feats0_tensor));
    input_tensors.push_back(std::move(sc0_tensor));
    input_tensors.push_back(std::move(mkpts1_tensor));
    input_tensors.push_back(std::move(feats1_tensor));

    // Run session
    auto output_tensors =
        matchingSession_.Run(Ort::RunOptions{ nullptr }, matchingInputNames.data(),
            input_tensors.data(), input_tensors.size(), matchingOutputNames.data(), matchingOutputNames.size());
    
    // Check output tensors
    if (output_tensors.size() != matchingOutputNames.size() || !output_tensors.front().IsTensor()) {
        std::cerr << "Error: Output tensor size mismatch or output is not a tensor." << std::endl;
        return -1;
    }

    // Get outputs
    auto matchesShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
    int dim1 = static_cast<int>(matchesShape[0]); // num
    int dim2 = static_cast<int>(matchesShape[1]); // 4
    // To cv::Mat
    float* matchesDataPtr = output_tensors[0].GetTensorMutableData<float>();
    matches = cv::Mat(dim1, dim2, CV_32F, matchesDataPtr).clone();

    auto batch_indexesShape = output_tensors[1].GetTensorTypeAndShapeInfo().GetShape();
    dim1 = static_cast<int>(batch_indexesShape[0]); // num

    float* batch_indexesDataPtr = output_tensors[1].GetTensorMutableData<float>();
    batch_indexes = cv::Mat(dim1, 1, CV_32F, batch_indexesDataPtr).clone();

    return 0;
}

cv::Mat warpCornersAndDrawMatches(const std::vector<cv::Point2f>& refPoints, const std::vector<cv::Point2f>& dstPoints,
	const cv::Mat& img1, const cv::Mat& img2)
{
	// Step 1: Calculate the Homography matrix and mask
	cv::Mat mask;
	cv::Mat H = cv::findHomography(refPoints, dstPoints, cv::RANSAC, 3.5, mask, 1000, 0.999);
	mask = mask.reshape(1, mask.total());  // Flatten the mask

	// Step 2: Get corners of the first image (img1)
	std::vector<cv::Point2f> cornersImg1 = { cv::Point2f(0, 0), cv::Point2f(img1.cols - 1, 0),
											cv::Point2f(img1.cols - 1, img1.rows - 1), cv::Point2f(0, img1.rows - 1) };
	std::vector<cv::Point2f> warpedCorners(4);

	// Step 3: Warp corners to the second image (img2) space
	cv::perspectiveTransform(cornersImg1, warpedCorners, H);

	// Step 4: Draw the warped corners in image2
	cv::Mat img2WithCorners = img2.clone();
	for (size_t i = 0; i < warpedCorners.size(); i++) {
		cv::line(img2WithCorners, warpedCorners[i], warpedCorners[(i + 1) % 4], cv::Scalar(0, 255, 0), 4);
	}

	// Step 5: Prepare keypoints and matches for drawMatches function
	std::vector<cv::KeyPoint> keypoints1, keypoints2;
	std::vector<cv::DMatch> matches;
	for (size_t i = 0; i < refPoints.size(); i++) {
		if (mask.at<uchar>(i)) {  // Only consider inliers
			keypoints1.emplace_back(refPoints[i], 5);
			keypoints2.emplace_back(dstPoints[i], 5);
		}
	}
	for (size_t i = 0; i < keypoints1.size(); i++) {
		matches.emplace_back(i, i, 0);
	}

	// Draw inlier matches
	cv::Mat imgMatches;
	cv::drawMatches(img1, keypoints1, img2WithCorners, keypoints2, matches, imgMatches, cv::Scalar(0, 255, 0), cv::Scalar::all(-1), std::vector<char>(), cv::DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS);

	return imgMatches;
}

// Helper function to draw keypoints
cv::Mat drawKeypoints(const cv::Mat& img, const cv::Mat& mkpts) {
    cv::Mat imgWithKeypoints = img.clone();
    for (int i = 0; i < mkpts.rows; ++i) {
        cv::Point2f pt(mkpts.at<float>(i, 0), mkpts.at<float>(i, 1));
        cv::circle(imgWithKeypoints, pt, 5, cv::Scalar(0, 0, 255), -1);
    }
    return imgWithKeypoints;
}


int main()
{
    std::string xfeatModelPath = "/home/rack_dl/image_registration/xfeat/xfeat_dualscale.onnx";
    std::string matchingModelPath = "/home/rack_dl/image_registration/xfeat/matching.onnx";
    cv::Mat image0 = cv::imread("/home/rack_dl/register/xfeat/6.jpg");
    cv::Mat image1 = cv::imread("/home/rack_dl/register/xfeat/5.jpg");
    cv::Mat mkpts0, feats0, sc0;
    cv::Mat mkpts1, feats1, sc1;
    cv::Mat matches, batch_indexes;

    // Init xfeat object
    XFeat xfeat(xfeatModelPath, matchingModelPath);

    // Extract features
    xfeat.detectAndCompute(image0, mkpts0, feats0, sc0);
    xfeat.detectAndCompute(image1, mkpts1, feats1, sc1);

    // Matching and refine
    xfeat.matchStar(mkpts0, feats0, sc0, mkpts1, feats1, matches, batch_indexes);

    // Print results
    std::cout << "matches: " << matches.rows << "x" << matches.cols << "x" << matches.channels() << std::endl;
    std::cout << "batch_indexes: " << batch_indexes.rows << "x" << batch_indexes.cols << "x" << batch_indexes.channels() << std::endl;

    // Get points
    std::vector<cv::Point2f> points0, points1;
    for (int i = 0; i < matches.rows; i++) {
        points0.push_back(cv::Point2f(*((float*)matches.data + i * 4), *((float*)matches.data + i * 4 + 1)));
        points1.push_back(cv::Point2f(*((float*)matches.data + i * 4 + 2), *((float*)matches.data + i * 4 + 3)));
    }
    
    cv::Mat homography, transformed_img;
    homography = cv::findHomography(points0, points1, cv::RANSAC);
                
    if(homography.empty())
    {
        std::cout << "Homography image empty" << std::endl;
    }

    cv::warpPerspective(image0, transformed_img, homography, image1.size());
    

    // Visualization
    cv::Mat drawImage = warpCornersAndDrawMatches(points0, points1, image0, image1);
    

    // Display images
    //cv::imshow("Detected Keypoints Image0", drawKeypoints(image0, mkpts0));
    //cv::imshow("Detected Keypoints Image1", drawKeypoints(image1, mkpts1));
    //cv::imshow("Matches", drawImage);
    cv::imshow("Registered", transformed_img);
    cv::waitKey();

    return 0;
}

Change the model and image path accordingly.
Download only the models from this link
Xfeat

Just take the models from the above link. The provided script here is modified.
Once again thanks to the reference code

Training on custom datasets

I encountered a problem in the process of creating my own megadepth_indices. I consulted LoFTR instructions and d2-net, but still couldn't find a way to create my own megadepth_indices. Can you provide the code for creating it?

XFeat + GIM

Hi,

Thanks for your amazing work!

Have you noticed the this GIM work: https://xuelunshen.com/gim/ ? It seems using internet videos to train the feature & matching networks is very powerful.

Do you have any plan to retrain the xfeat using their approach?

Best regards.

some questions

Thank you for good job. some question as follows:
(1) I train my own data. the raw image size: 128*128. wo revised model to get big feature map.
(2) self._unfold2d(x, ws=8), the 8 is fixed-parameters, but the keypoint_position_loss and coordinate_classification_loss still have 8, is equal param, must modify at the same time?
(3) the generateRandomTPS has grid (8,6), what is the meaning? and it relates to (2) ws=8
(4) acc_f is always nan in my training, is it normal?
Loss: 7.6677 acc_c0 0.314 acc_c1 0.156 acc_f: nan loss_c: 5.476 loss_f: 8.002 loss_kp: 0.065 #matches_c: 64 loss_kp_pos: 16.468 acc_kp_pos: 0.030:

Input is RGB or BGR?

Hi @guipotje I have another question.
In the training code you use cv2.imread to read images. However, the default image read is BGR color mode. Is it a bug?

Minor typo in Section 3.1 of the paper

In Eq. (1), the $H_i ∗ W_i$ terms emerge as the primary computational bottleneck [...]

is probably meant to use \cdot instead of *, i.e.

In Eq. (1), the $H_i \cdot W_i$ terms emerge as the primary computational bottleneck [...]

in Section 3.1 of the paper. Also, there's an

accross

later in the section that contains one superfluous "c".

How to backpropagation with this work?

I want to use mkpts_0, mkpts_1 = xfeat.match_xfeat(im1, im2, top_k = 4096) to optimize different targets based on the distance between matches.
However, even if I remove the inference stuff, the back-propagation doesn't seem to work well.
Could you please tell me what I should do?

Thank you for your tremendous work.

Best regards

WIP combining xfeat with steerers

I have a WIP of combining xfeat with steerers. I'm posting here in case someone else than me is interested in looking at this.

I've trained two versions of XFeat using this fork, one with a fixed permutation steerer and one with a learned steerer (this seems to be slightly better). A quick colab demo for the fixed steerer is here. Weights are here. I haven't really evaluated well (when will your evaluation code be released approximately?), but on a quick test on HPatches, the new versions seem comparable to the original on upright images.

Anyway, happy to discuss ideas in this thread.

How does this method perform if training without Reliability Map?

Hello. Thank you for your nice work and contribution of field.

I have some questions.

  1. How does this method perform if training without Reliability Map?
  2. Since the method detected keypoints with simple network(only 4 1*1 conv), did you validate some metric about keypoint detection, e.g. Repeatability?

I look forward to your reply and thank you for your excellent work!

Set up multiple GPUs during training

hello,I set multiple GPUs as this “--device_num '0,1,2,3,4,5,6,7,8'”,but still only the 0th gpu was used,it seems that the settings are not working。

Requesting a constants.pkl file for pre trained models

Xfeature has been very helpful to me. Recently, I have been using libtorch to modify it to C++code. During the testing phase, when loading the pre trained model, the following error occurred:
"Terminate called after throwing an instance of 'c10:: Error'"
What(): PytorchStreamReader failed locating file constants.pkl: file not found

So I would like to request a constants.pkl file in the pre trained model.Looking forward to your reply

using smaller resolution for training

Hi @guipotje
I want to use 256x256 images for training, the original image size of my dataset is 512x512. During training, the images will be automatically resized to 256x256, right? Could you tell me the correct way to set the parameters?
What I have done is to set training_res =256,256, grid=(5,5) in generateRandomTPS. But then the keypoint_pos loss is high (about 20), and the reliability is always increasing, and Accuracy/kp_position is always decreasing. This didn't happen when I set training_res=512,512. Thank you in advance.

acc_f nan

Hello, I am using the description in the README to reproduce your code.

During this process, I encountered some issues. I don't know why the acc_f is showing up as nan. I made some changes to the code and I'm not sure if these changes are related to the acc_f being nan.

I made the following changes:

  1. On line 161 of 'augumentation.py', I changed im = np.rot90(im) to im = cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE) to avoid errors during training.
  2. In utils.py, line 36, I changed data = np.array(h5py.File(f, 'r')['/depth']) to data = np.array(h5py.File(f, 'r')['/depth'], dtype=np.float32) to avoid errors during training.

Besides the changes mentioned, I did not modify anything else, but acc_f still shows nan. Could you explain why this might be happening?

Multi-modality?

Great work on this project, it looks very promising! What are your thought on using this architecture to find keypoints and descriptors across modalities? This paper attempts to match features across rgb and infrared images. Do you see any reasons why this might not work for your architecture?

Xfeat + LighterGlue (Google Colab)

Hello!

I am using the provided XFeat + LighterGlue Google Colab to test this new model. However, I am not sure Google Colab is loading the correct model. It seems that it is downloading xfeat.pt instead of xfeat-lighterglue.pt
Downloading: "https://github.com/verlab/accelerated_features/raw/main/weights/xfeat.pt" to /root/.cache/torch/hub/checkpoints/xfeat.pt

Is this ok?

Thanks!

UserWarning: torch.meshgrid

UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ..\aten\src\ATen\native\TensorShape.cpp:2157.)
return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]

../accelerated_features/third_party/alike_wrapper.py line 76
../accelerated_features/third_party/ALIKE/soft_detect.py line 88

Training-related issues

Could you explain how the model is trained on COCO since it doesn’t have labels? Is it by performing geometric transformations on the images and then ensuring that the feature points extracted by the model from the original and transformed images are the same?

Eval-MegaDepth-1500

Hello, I evaluated the method on MegaDepth based on the evaluation of LOFTR, but the effect is not good, I can't figure out the problem for the time being. Looking forward to your assessment.

Does XFeat work for two similar images but one of them is upsidedown?

I just found if two similar images (or same), if one of them is upside down, the matching points are horrible. It didn't work as SIFT, which matches the features between both images and can rotate back when aligning two images.

Am I wrong here? or any suggestions

Otherwise, it is fast and accurate if two images are the same orientation.

Thanks

revise self._unfold2d(x, ws=8) ?

HI, I trained my own data. image size about 128*128, and change model` self.block1 = nn.Sequential(
BasicLayer( 1, 8, stride=1),
BasicLayer( 8, 24, stride=1),
BasicLayer( 24, 64, stride=1),
)
self.block2 = nn.Sequential(
BasicLayer(64, 64, stride=2),
BasicLayer(64, 64, stride=1),
BasicLayer(64, 64, stride=1),
)

	self.block3 = nn.Sequential(
									BasicLayer( 64, 128, stride=2),
									BasicLayer(128, 128, stride=1),
									BasicLayer(128, 128, stride=1),
									BasicLayer(128,  64, 1, padding=0),
								 )

	self.block_fusion =  nn.Sequential(
									BasicLayer(64, 64, stride=1),
									BasicLayer(64, 64, stride=1),
									nn.Conv2d (64, 64, 1, padding=0)
								 )

	self.heatmap_head = nn.Sequential(
									BasicLayer(64, 64, 1, padding=0),
									BasicLayer(64, 64, 1, padding=0),
									nn.Conv2d (64, 1, 1),
									nn.Sigmoid()
								)


	self.keypoint_head = nn.Sequential(
									BasicLayer(4, 64, 1, padding=0),
									BasicLayer(64, 64, 1, padding=0),
									BasicLayer(64, 64, 1, padding=0),
									nn.Conv2d (64, 5, 1),
								)

and forward change as follow: def forward(self, x):
"""
input:
x -> torch.Tensor(B, C, H, W) grayscale or rgb images
return:
feats -> torch.Tensor(B, 64, H/8, W/8) dense local features
keypoints -> torch.Tensor(B, 65, H/8, W/8) keypoint logit map
heatmap -> torch.Tensor(B, 1, H/8, W/8) reliability map

	"""
	#dont backprop through normalization
	with torch.no_grad():
		x = x.mean(dim=1, keepdim = True)
		x = self.norm(x)

	#main backbone
	x1 = self.block1(x)
	x2 = self.block2(x1)
	x3 = self.block3(x2)
	x4 = F.interpolate(x3, (x2.shape[-2], x2.shape[-1]), mode='bilinear')
	feats = self.block_fusion(x4 + x2)
	
	#heads
	heatmap = self.heatmap_head(feats)                        # Reliability map
	keypoints = self.keypoint_head(self._unfold2d(x, ws=2))   # Keypoint map logits
	return feats, keypoints, heatmap`

the unflod2d ws change to 2, how to revise keypoint_head ? and how to revise losses.py?
thank you

Extract feature points manually

I have a task, I want to use the coordinates of the center point of the detection box of the target detection as the feature point input, so as to match the next frame image to achieve the tracking effect. How are the feature points of XFeat learned?Methods such as COTR do this, but are inefficient.

Use both: keypoints + refinementMLP ?

I can see that match_xfeat and match_xfeat_star use different strategies for fine grained feature matching:

match_xfeat

On one hand, match_xfeat uses the model keypoints output (fine-grained heatmap) to first extract the mkpts and then use those mkpts to bicubic interpolate the descriptors (aka feats) to the correct offest.

match_xfeat_star

On the other hand match_xfeat_star, totally ignores the model keypoints output. match_xfeat_star get the correct offsets by using the refinementMLP on the raw model feats (no feats interpolation).

My idea: Use both for better performance ?

Use keypoints and interpolate feats Use refinementMLP to offest mkpts
match_xfeat
match_xfeat_star
Use both

My idea to (maybe) achieve a better accuracy is to use the refinementMLP on the bicubic-interpolated feats to further correct the offset.

Usage on multimodal data

Nice work!

Did you already tried some tests with multimodal data? Such as depth images and thermal?

Training XFEAT

I am very grateful to your comprehensive and organized repository on GitHub.
When do you plan to release the code for training networks for custom applications?"

XFeat + LightGlue

Hello everyone,

I'm training some LightGlue variations (finding a neat trade-off between model size vs accuracy) and soon I will update the repo with the model and weights in the next weeks!

You can follow this issue if you are interested.

Best,

Guilherme

training

Hello, I want to do some fine-tuning based on your trained model and my own dataset. Since there is no label, I want to replace the coco20k dataset with my own dataset, or add some of my own dataset to it. Do you recommend "full training" or "training with existing weights"? Do I need to modify the training hyperparameters and strategies? Looking forward to your reply.

The code performs poorly

I found that without lightglue, the effect is very poor.
2024-08-20_21-30-47

Besides, when I printed the head map, the effect was also very poor.
2024-08-20_21-32-55
imshow code is

@torch.inference_mode()
	def detectAndCompute(self, x, top_k = None, detection_threshold = None):
		"""
			Compute sparse keypoints & descriptors. Supports batched mode.

			input:
				x -> torch.Tensor(B, C, H, W): grayscale or rgb image
				top_k -> int: keep best k features
			return:
				List[Dict]: 
					'keypoints'    ->   torch.Tensor(N, 2): keypoints (x,y)
					'scores'       ->   torch.Tensor(N,): keypoint scores
					'descriptors'  ->   torch.Tensor(N, 64): local features
		"""
		if top_k is None: top_k = self.top_k
		if detection_threshold is None: detection_threshold = self.detection_threshold
		x, rh1, rw1 = self.preprocess_tensor(x)

		B, _, _H1, _W1 = x.shape
        
		M1, K1, H1 = self.net(x)
		mod = torch.jit.trace(self.net, x)
		torch.jit.save(mod,"feature.pt")
		M1 = F.normalize(M1, dim=1)

		#Convert logits to heatmap and extract kpts
		K1h = self.get_kpts_heatmap(K1)

		array1=K1h.numpy()#将tensor数据转为numpy数据
		maxValue=array1.max()
		array1=array1*255#normalize,将图像数据扩展到[0,255]
		mat=np.uint8(array1)#float32-->uint8
		print('mat_shape:',mat.shape)#mat_shape: (3, 982, 814)
		mat=mat[0, : , :, :].transpose(1,2,0)#mat_shape: (982, 814,3)
		cv2.imshow("img",mat)
		
		array2=H1.numpy()#将tensor数据转为numpy数据
		maxValue=array2.max()
		print(maxValue)
		#array2=array2*255/maxValue#normalize,将图像数据扩展到[0,255]
		array2=array2 * 255#normalize,将图像数据扩展到[0,255]
		mat=np.uint8(array2)#float32-->uint8
		print('mat_shape:',mat.shape)#mat_shape: (3, 982, 814)
		mat=mat[0, : , :, :].transpose(1,2,0)#mat_shape: (982, 814,3)
		cv2.imshow("img2",mat)
		cv2.waitKey()

		mkpts = self.NMS(K1h, threshold=detection_threshold, kernel_size=5)

		#Compute reliability scores
		_nearest = InterpolateSparse2d('nearest')
		_bilinear = InterpolateSparse2d('bilinear')
		scores = (_nearest(K1h, mkpts, _H1, _W1) * _bilinear(H1, mkpts, _H1, _W1)).squeeze(-1)
		scores[torch.all(mkpts == 0, dim=-1)] = -1

		#Select top-k features
		idxs = torch.argsort(-scores)
		mkpts_x  = torch.gather(mkpts[...,0], -1, idxs)[:, :top_k]
		mkpts_y  = torch.gather(mkpts[...,1], -1, idxs)[:, :top_k]
		mkpts = torch.cat([mkpts_x[...,None], mkpts_y[...,None]], dim=-1)
		scores = torch.gather(scores, -1, idxs)[:, :top_k]

		#Interpolate descriptors at kpts positions
		feats = self.interpolator(M1, mkpts, H = _H1, W = _W1)

		#L2-Normalize
		feats = F.normalize(feats, dim=-1)

		#Correct kpt scale
		mkpts = mkpts * torch.tensor([rw1,rh1], device=mkpts.device).view(1, 1, -1)

		valid = scores > 0
		return [  
				   {'keypoints': mkpts[b][valid[b]],
					'scores': scores[b][valid[b]],
					'descriptors': feats[b][valid[b]]} for b in range(B) 
			   `]`

Can you take a look at what's going on? thank you

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.