verlab / accelerated_features Goto Github PK

View Code? Open in Web Editor NEW

901.0 901.0 92.0 26.42 MB

Implementation of XFeat (CVPR 2024). Do you need robust and fast local feature extraction? You are in the right place!

Home Page: https://www.verlab.dcc.ufmg.br/descriptors/xfeat_cvpr24

License: Apache License 2.0

Python 2.04% Jupyter Notebook 97.96%

descriptors image-matching image-registration keypoints lightweight local-features real-time

accelerated_features's People

Contributors

Stargazers

Watchers

Forkers

hityzy1122 brukg kotthoff p-hoanganh amorrissette iamshubhamgupto souxun2015 migo-iq-inc yccckid strategist922 valioiv anug7 acai66 i8maqpxs starainj subarudad shubhpx ganwang chengwei920412 ducha-aiki imemmul abegnext zxw-king wleigang cabelo acsgn95 bharath5673 neerajkanhere ruisebastiao cappelletto jc-cr rising-turtle parskatt ericachaka weihaoysgs wasahaiah speike-shl rshivansh reyanshsolis lemanhtrung sugiyem islamfahd aviborg guyp98 anhuipl2010 david-willo stschake hemuyi2024 qubvel amritsingh183 techthiyanes songuyenerza kongan dreamfortek roby10 shanmugamani1023 datomi79 bumblebeeas meyiao living-image zet-rutherford hiyyg shuaibibobo naidjeldias sunmiaobo georg-bn hustno1wxy juan-slamcore domsavictor teanzhao mateencog xinningc madlab2 wsqszsq xinyedai longzeyilang sungrean sram-v atfinke lyfadvance asherchi laidongd myegos yibaimengren jack-chan-2001 aarijimam

accelerated_features's Issues

how to increase the weight of reliability loss

Hi, I want to increase the weight of reliability loss, which term should I adjust? The naming of losses confuses me.

ERROR: Could not find a version that satisfies the requirement opencv-contrib-python (from versions: none)

install in wsl's ubuntu18.04
cant't install opencv-contrib-python
the following is errors:
ERROR: Could not find a version that satisfies the requirement opencv-contrib-python (from versions: none)
ERROR: No matching distribution found for opencv-contrib-python

can the descriptors of xfeat be input to lightglue for matching?

when I get the descriptors of xfeat points, can i input them to lightglue to train a matcher? would this improve the results?

Image registration based on xfeat c++

Thanks to @acai66 for the reference code. it has helped alot. There code has been modifed a bit

For the followings i had help from stackoverflow

Step1: Install onnxruntime

mkdir /tmp/onnxInstall
cd /tmp/onnxInstall
wget -O onnx_archive.nupkg https://www.nuget.org/api/v2/package/Microsoft.ML.OnnxRuntime/1.7.0
unzip onnx_archive.nupkg
cp runtimes/linux-x64/native/libonnxruntime.so ~/.local/lib/
cp -r build/native/include/ ~/.local/include/onnxruntime/

Step2: Cmake

Now if you want to be able to find_package(onnxruntime) from your Cmake package, I suggest you place my self-created onnx cmake files in ~/.local/share/cmake/onnxruntime. The files are:

create a ~/.local/share/cmake/onnxruntime/onnxruntimeVersion.cmake:

set(PACKAGE_VERSION "1.7.0")
if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}")
set(PACKAGE_VERSION_COMPATIBLE FALSE)
else()
set(PACKAGE_VERSION_COMPATIBLE TRUE)
if("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}")
set(PACKAGE_VERSION_EXACT TRUE)
endif()
endif()

Step3: create a file ~/.local/share/cmake/onnxruntime/onnxruntimeConfig.cmake

include(FindPackageHandleStandardArgs)

///////////// Assume we are in /share/cmake/onnxruntime/onnxruntimeConfig.cmake
get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
get_filename_component(onnxruntime_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE)

set(onnxruntime_INCLUDE_DIRS ${onnxruntime_INSTALL_PREFIX}/include)
set(onnxruntime_LIBRARIES onnxruntime)
set(onnxruntime_CXX_FLAGS "") # no flags needed

find_library(onnxruntime_LIBRARY onnxruntime
PATHS "${onnxruntime_INSTALL_PREFIX}/lib"
)

add_library(onnxruntime SHARED IMPORTED)
set_property(TARGET onnxruntime PROPERTY IMPORTED_LOCATION "${onnxruntime_LIBRARY}")
set_property(TARGET onnxruntime PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_INCLUDE_DIRS}")
set_property(TARGET onnxruntime PROPERTY INTERFACE_COMPILE_OPTIONS "${onnxruntime_CXX_FLAGS}")

find_package_handle_standard_args(onnxruntime DEFAULT_MSG onnxruntime_LIBRARY onnxruntime_INCLUDE_DIRS)

Step4: Project Cmakelist.txt

cmake_minimum_required(VERSION 3.0)
project(YourProjectName)
#set(OpenCV_DIR "path/to/opencv/build")
include_directories("~/.local/include/onnxruntime/")
find_package(OpenCV REQUIRED)
find_package(onnxruntime)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED True)

include_directories(${ONNXRUNTIME_INCLUDE_DIR})
include_directories(${OpenCV_INCLUDE_DIRS})

add_executable(ContourDetection demo.cpp)
target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS})
target_link_libraries(${PROJECT_NAME} "~/.local/lib/libonnxruntime.so")

Step5: Create a symlink between onnxruntime shared library

ln -s ~/.local/lib/libonnxruntime.so /usr/local/lib/onnxruntime.so.1.7.0

The modified c++ code

#include <chrono>
#include <iostream>
#include "omp.h"

#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>


// for onnx model path
const ORTCHAR_T* stringToOrtchar_t(std::string const& s)
{
#ifdef _WIN32
	const char* CStr = s.c_str();
	size_t len = strlen(CStr) + 1;
	size_t converted = 0;
	wchar_t* WStr;
	WStr = (wchar_t*)malloc(len * sizeof(wchar_t));
	mbstowcs_s(&converted, WStr, len, CStr, _TRUNCATE);

	return WStr;
#else
	return s.c_str();
#endif // _WIN32
}

bool initOrtSession(const Ort::Env& env, Ort::Session& session, std::string& modelPath, const int& gpuId = 0)
{
	const ORTCHAR_T* ortModelPath = stringToOrtchar_t(modelPath);

	bool sessionIsAvailable = false;
	/* 
	if (sessionIsAvailable == false)
	{
		try
		{
			Ort::SessionOptions session_options;
			session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);

			// try Tensorrt 
			OrtTensorRTProviderOptions trtOptions{};
			trtOptions.device_id = gpuId;
			trtOptions.trt_fp16_enable = 1;
			trtOptions.trt_engine_cache_enable = 1;
			trtOptions.trt_engine_cache_path = "./trt_engine_cache";


			trtOptions.trt_max_workspace_size = (size_t)4 * 1024 * 1024 * 1024;

			session_options.AppendExecutionProvider_TensorRT(trtOptions);

			session = Ort::Session(env, ortModelPath, session_options);
			
			sessionIsAvailable = true;
			std::cout << "Using accelerator: Tensorrt" << std::endl;
		}
		catch (Ort::Exception e)
		{
			std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl;
			std::cout << "Failed to init Tensorrt accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
		catch (...)
		{
			std::cout << "Failed to init Tensorrt accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
	}
	*/

	if (sessionIsAvailable == false)
	{
		try
		{
			Ort::SessionOptions session_options;
			session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);

			OrtCUDAProviderOptions cuda0ptions;
			cuda0ptions.device_id = gpuId;
			cuda0ptions.cuda_mem_limit = 4 << 30;

			session_options.AppendExecutionProvider_CUDA(cuda0ptions);

			session = Ort::Session(const_cast<Ort::Env&>(env), ortModelPath, session_options);
			
			sessionIsAvailable = true;
			std::cout << "Using accelerator: CUDA" << std::endl;
		}
		catch (Ort::Exception e)
		{
			std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl;
			std::cout << "Failed to init CUDA accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
		catch (...)
		{
			std::cout << "Failed to init CUDA accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
	}
	if (sessionIsAvailable == false)
	{
		try
		{
			Ort::SessionOptions session_options;
			session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);

			session = Ort::Session(const_cast<Ort::Env&>(env), ortModelPath, session_options);
			
			sessionIsAvailable = true;
			std::cout << "Using accelerator: CPU" << std::endl;
		}
		catch (Ort::Exception e)
		{
			std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl;
			std::cout << "Failed to init CPU accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
		catch (...)
		{
			std::cout << "Failed to init CPU accelerator." << std::endl;
			sessionIsAvailable = false;
		}
	}

	if (sessionIsAvailable == true)
	{
		Ort::AllocatorWithDefaultOptions allocator;
		// Get input layers count
		size_t num_input_nodes = session.GetInputCount();

		// Get input layer type, shape, name
		for (int i = 0; i < num_input_nodes; i++)
		{
			
			// Name
			std::string input_name = session.GetInputName(i, allocator); //std::string(session.GetInputName(i, allocator).get());

			std::cout << "Input " << i << ": " << input_name << ", shape: (";

			// Type
			Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
			auto tensor_info = type_info.GetTensorTypeAndShapeInfo();

			ONNXTensorElementDataType type = tensor_info.GetElementType();

			// Shape
			std::vector<int64_t> input_node_dims = tensor_info.GetShape();

			for (int j = 0; j < input_node_dims.size(); j++) {
				std::cout << input_node_dims[j];
				if (j == input_node_dims.size() - 1)
				{
					std::cout << ")" << std::endl; 
				}
				else
				{
					std::cout << ", ";
				}
			}
		}

		// Get output layers count
		size_t num_output_nodes = session.GetOutputCount();

		// Get output layer type, shape, name
		for (int i = 0; i < num_output_nodes; i++) {
			// Name
			std::string output_name =  session.GetOutputName(i, allocator);
			std::cout << "Output " << i << ": " << output_name << ", shape: (";

			// type
			Ort::TypeInfo type_info = session.GetOutputTypeInfo(i);
			auto tensor_info = type_info.GetTensorTypeAndShapeInfo();

			ONNXTensorElementDataType type = tensor_info.GetElementType();

			// shape
			std::vector<int64_t> output_node_dims = tensor_info.GetShape();
			for (int j = 0; j < output_node_dims.size(); j++) {
				std::cout << output_node_dims[j];
				if (j == output_node_dims.size() - 1)
				{
					std::cout << ")" << std::endl; 
				}
				else
				{
					std::cout << ", ";
				}
			}
		}
		
	}
	else
	{
		std::cout << modelPath << " is invalid model." << std::endl;
	}

	return sessionIsAvailable;
}


class XFeat
{
public:
	XFeat(std::string &xfeatModelPath, std::string& matchingModelPath);
	int detectAndCompute(const cv::Mat &image, cv::Mat &mkpts, cv::Mat& feats, cv::Mat& sc);
	int matchStar(const cv::Mat& mkpts0, const cv::Mat& feats0, const cv::Mat& sc0, const cv::Mat& mkpts1, const cv::Mat& feats1, cv::Mat& matches, cv::Mat& batch_indexes);

	~XFeat();

	// gpu id
	int gpuId_ = 0;

	// onnxruntime
	Ort::Env env_{ nullptr };
	Ort::Session xfeatSession_{ nullptr };
	Ort::Session matchingSession_{ nullptr };
	Ort::AllocatorWithDefaultOptions allocator;

	//
	std::vector<const char*> xfeatInputNames = { "images" };
	std::vector<const char*> xfeatOutputNames = { "mkpts", "feats", "sc" };
	std::vector<const char*> matchingInputNames = { "mkpts0", "feats0", "sc0", "mkpts1", "feats1"};
	std::vector<const char*> matchingOutputNames = { "matches", "batch_indexes" };

	bool initFinishedFlag_ = false;
};

XFeat::XFeat(std::string& xfeatModelPath, std::string& matchingModelPath)
{
	const ORTCHAR_T* ortXfeatModelPath = stringToOrtchar_t(xfeatModelPath);
	const ORTCHAR_T* ortMatchingModelPath = stringToOrtchar_t(matchingModelPath);

	env_ = Ort::Env{ OrtLoggingLevel::ORT_LOGGING_LEVEL_FATAL, "xfeat_demo" };  //  ORT_LOGGING_LEVEL_VERBOSE, ORT_LOGGING_LEVEL_FATAL

	std::vector<std::string> availableProviders = Ort::GetAvailableProviders();
	std::cout << "All available accelerators:" << std::endl;
	for (int i = 0; i < availableProviders.size(); i++)
	{
		std::cout << "  " << i + 1 << ". " << availableProviders[i] << std::endl;
	}
	// init sessions
	initOrtSession(env_, xfeatSession_, xfeatModelPath, gpuId_);
	initOrtSession(env_, matchingSession_, matchingModelPath, gpuId_);
}

XFeat::~XFeat()
{
	env_.release();
	xfeatSession_.release();
	matchingSession_.release();
}

int XFeat::detectAndCompute(const cv::Mat& image, cv::Mat& mkpts, cv::Mat& feats, cv::Mat& sc)
{
	// Pre process
	cv::Mat preProcessedImage = cv::Mat::zeros(image.rows, image.cols, CV_32FC3);
	int stride = preProcessedImage.rows * preProcessedImage.cols;
#pragma omp parallel for 
	for (int i = 0; i < stride; i++) // HWC -> CHW, BGR -> RGB
	{
		*((float*)preProcessedImage.data + i) = (float)*(image.data + i * 3 + 2);
		*((float*)preProcessedImage.data + i + stride) = (float)*(image.data + i * 3 + 1);
		*((float*)preProcessedImage.data + i + stride * 2) = (float)*(image.data + i * 3);
	}

	// Create input tensor
	int64_t input_size = preProcessedImage.rows * preProcessedImage.cols * 3;
	std::vector<int64_t> input_node_dims = { 1, 3, preProcessedImage.rows , preProcessedImage.cols };
	auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
	Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(preProcessedImage.data), input_size, input_node_dims.data(), input_node_dims.size());
	assert(input_tensor.IsTensor());


	// Run sessionn
	auto output_tensors =
		xfeatSession_.Run(Ort::RunOptions{ nullptr }, xfeatInputNames.data(),
			&input_tensor, xfeatInputNames.size(), xfeatOutputNames.data(), xfeatOutputNames.size());
	assert(output_tensors.size() == xfeatOutputNames.size() && output_tensors.front().IsTensor());

	// Get outputs
	auto mkptsShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
	int dim1 = static_cast<int>(mkptsShape[0]); // 1
	int dim2 = static_cast<int>(mkptsShape[1]); // 4800
	int dim3 = static_cast<int>(mkptsShape[2]); // 2
	float* mkptsDataPtr = output_tensors[0].GetTensorMutableData<float>();
	// To cv::Mat
	mkpts = cv::Mat(dim1, dim2, CV_32FC(dim3), mkptsDataPtr).clone();

	auto featsShape = output_tensors[1].GetTensorTypeAndShapeInfo().GetShape();
	dim1 = static_cast<int>(featsShape[0]); // 1
	dim2 = static_cast<int>(featsShape[1]); // 4800
	dim3 = static_cast<int>(featsShape[2]); // 64
	float* featsDataPtr = output_tensors[1].GetTensorMutableData<float>();
	feats = cv::Mat(dim1, dim2, CV_32FC(dim3), featsDataPtr).clone();

	auto scShape = output_tensors[2].GetTensorTypeAndShapeInfo().GetShape();
	dim1 = static_cast<int>(scShape[0]); // 1
	dim2 = static_cast<int>(scShape[1]); // 4800
	float* scDataPtr = output_tensors[2].GetTensorMutableData<float>();
	sc = cv::Mat(dim1, dim2, CV_32F, scDataPtr).clone();

	return 0;
}

int XFeat::matchStar(const cv::Mat& mkpts0, const cv::Mat& feats0, const cv::Mat& sc0, const cv::Mat& mkpts1, const cv::Mat& feats1, cv::Mat& matches, cv::Mat& batch_indexes)
{
    auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);

    int64_t mkpts0_size = mkpts0.total() * mkpts0.elemSize();
    std::vector<int64_t> mkpts0_dims = { mkpts0.rows, mkpts0.cols, mkpts0.channels() };
    Ort::Value mkpts0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(mkpts0.data), mkpts0_size, mkpts0_dims.data(), mkpts0_dims.size());

    int64_t feats0_size = feats0.total() * feats0.elemSize();
    std::vector<int64_t> feats0_dims = { feats0.rows, feats0.cols, feats0.channels() };
    Ort::Value feats0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(feats0.data), feats0_size, feats0_dims.data(), feats0_dims.size());

    int64_t sc0_size = sc0.total() * sc0.elemSize();
    std::vector<int64_t> sc0_dims = { sc0.rows, sc0.cols };
    Ort::Value sc0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(sc0.data), sc0_size, sc0_dims.data(), sc0_dims.size());

    int64_t mkpts1_size = mkpts1.total() * mkpts1.elemSize();
    std::vector<int64_t> mkpts1_dims = { mkpts1.rows, mkpts1.cols, mkpts1.channels() };
    Ort::Value mkpts1_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(mkpts1.data), mkpts1_size, mkpts1_dims.data(), mkpts1_dims.size());

    int64_t feats1_size = feats1.total() * feats1.elemSize();
    std::vector<int64_t> feats1_dims = { feats1.rows, feats1.cols, feats1.channels() };
    Ort::Value feats1_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(feats1.data), feats1_size, feats1_dims.data(), feats1_dims.size());

    // Create input tensors
    std::vector<Ort::Value> input_tensors;
    input_tensors.push_back(std::move(mkpts0_tensor));
    input_tensors.push_back(std::move(feats0_tensor));
    input_tensors.push_back(std::move(sc0_tensor));
    input_tensors.push_back(std::move(mkpts1_tensor));
    input_tensors.push_back(std::move(feats1_tensor));

    // Run session
    auto output_tensors =
        matchingSession_.Run(Ort::RunOptions{ nullptr }, matchingInputNames.data(),
            input_tensors.data(), input_tensors.size(), matchingOutputNames.data(), matchingOutputNames.size());
    
    // Check output tensors
    if (output_tensors.size() != matchingOutputNames.size() || !output_tensors.front().IsTensor()) {
        std::cerr << "Error: Output tensor size mismatch or output is not a tensor." << std::endl;
        return -1;
    }

    // Get outputs
    auto matchesShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
    int dim1 = static_cast<int>(matchesShape[0]); // num
    int dim2 = static_cast<int>(matchesShape[1]); // 4
    // To cv::Mat
    float* matchesDataPtr = output_tensors[0].GetTensorMutableData<float>();
    matches = cv::Mat(dim1, dim2, CV_32F, matchesDataPtr).clone();

    auto batch_indexesShape = output_tensors[1].GetTensorTypeAndShapeInfo().GetShape();
    dim1 = static_cast<int>(batch_indexesShape[0]); // num

    float* batch_indexesDataPtr = output_tensors[1].GetTensorMutableData<float>();
    batch_indexes = cv::Mat(dim1, 1, CV_32F, batch_indexesDataPtr).clone();

    return 0;
}

cv::Mat warpCornersAndDrawMatches(const std::vector<cv::Point2f>& refPoints, const std::vector<cv::Point2f>& dstPoints,
	const cv::Mat& img1, const cv::Mat& img2)
{
	// Step 1: Calculate the Homography matrix and mask
	cv::Mat mask;
	cv::Mat H = cv::findHomography(refPoints, dstPoints, cv::RANSAC, 3.5, mask, 1000, 0.999);
	mask = mask.reshape(1, mask.total());  // Flatten the mask

	// Step 2: Get corners of the first image (img1)
	std::vector<cv::Point2f> cornersImg1 = { cv::Point2f(0, 0), cv::Point2f(img1.cols - 1, 0),
											cv::Point2f(img1.cols - 1, img1.rows - 1), cv::Point2f(0, img1.rows - 1) };
	std::vector<cv::Point2f> warpedCorners(4);

	// Step 3: Warp corners to the second image (img2) space
	cv::perspectiveTransform(cornersImg1, warpedCorners, H);

	// Step 4: Draw the warped corners in image2
	cv::Mat img2WithCorners = img2.clone();
	for (size_t i = 0; i < warpedCorners.size(); i++) {
		cv::line(img2WithCorners, warpedCorners[i], warpedCorners[(i + 1) % 4], cv::Scalar(0, 255, 0), 4);
	}

	// Step 5: Prepare keypoints and matches for drawMatches function
	std::vector<cv::KeyPoint> keypoints1, keypoints2;
	std::vector<cv::DMatch> matches;
	for (size_t i = 0; i < refPoints.size(); i++) {
		if (mask.at<uchar>(i)) {  // Only consider inliers
			keypoints1.emplace_back(refPoints[i], 5);
			keypoints2.emplace_back(dstPoints[i], 5);
		}
	}
	for (size_t i = 0; i < keypoints1.size(); i++) {
		matches.emplace_back(i, i, 0);
	}

	// Draw inlier matches
	cv::Mat imgMatches;
	cv::drawMatches(img1, keypoints1, img2WithCorners, keypoints2, matches, imgMatches, cv::Scalar(0, 255, 0), cv::Scalar::all(-1), std::vector<char>(), cv::DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS);

	return imgMatches;
}

// Helper function to draw keypoints
cv::Mat drawKeypoints(const cv::Mat& img, const cv::Mat& mkpts) {
    cv::Mat imgWithKeypoints = img.clone();
    for (int i = 0; i < mkpts.rows; ++i) {
        cv::Point2f pt(mkpts.at<float>(i, 0), mkpts.at<float>(i, 1));
        cv::circle(imgWithKeypoints, pt, 5, cv::Scalar(0, 0, 255), -1);
    }
    return imgWithKeypoints;
}


int main()
{
    std::string xfeatModelPath = "/home/rack_dl/image_registration/xfeat/xfeat_dualscale.onnx";
    std::string matchingModelPath = "/home/rack_dl/image_registration/xfeat/matching.onnx";
    cv::Mat image0 = cv::imread("/home/rack_dl/register/xfeat/6.jpg");
    cv::Mat image1 = cv::imread("/home/rack_dl/register/xfeat/5.jpg");
    cv::Mat mkpts0, feats0, sc0;
    cv::Mat mkpts1, feats1, sc1;
    cv::Mat matches, batch_indexes;

    // Init xfeat object
    XFeat xfeat(xfeatModelPath, matchingModelPath);

    // Extract features
    xfeat.detectAndCompute(image0, mkpts0, feats0, sc0);
    xfeat.detectAndCompute(image1, mkpts1, feats1, sc1);

    // Matching and refine
    xfeat.matchStar(mkpts0, feats0, sc0, mkpts1, feats1, matches, batch_indexes);

    // Print results
    std::cout << "matches: " << matches.rows << "x" << matches.cols << "x" << matches.channels() << std::endl;
    std::cout << "batch_indexes: " << batch_indexes.rows << "x" << batch_indexes.cols << "x" << batch_indexes.channels() << std::endl;

    // Get points
    std::vector<cv::Point2f> points0, points1;
    for (int i = 0; i < matches.rows; i++) {
        points0.push_back(cv::Point2f(*((float*)matches.data + i * 4), *((float*)matches.data + i * 4 + 1)));
        points1.push_back(cv::Point2f(*((float*)matches.data + i * 4 + 2), *((float*)matches.data + i * 4 + 3)));
    }
    
    cv::Mat homography, transformed_img;
    homography = cv::findHomography(points0, points1, cv::RANSAC);
                
    if(homography.empty())
    {
        std::cout << "Homography image empty" << std::endl;
    }

    cv::warpPerspective(image0, transformed_img, homography, image1.size());
    

    // Visualization
    cv::Mat drawImage = warpCornersAndDrawMatches(points0, points1, image0, image1);
    

    // Display images
    //cv::imshow("Detected Keypoints Image0", drawKeypoints(image0, mkpts0));
    //cv::imshow("Detected Keypoints Image1", drawKeypoints(image1, mkpts1));
    //cv::imshow("Matches", drawImage);
    cv::imshow("Registered", transformed_img);
    cv::waitKey();

    return 0;
}

Change the model and image path accordingly.
Download only the models from this link
Xfeat

Just take the models from the above link. The provided script here is modified.
Once again thanks to the reference code

Training on custom datasets

I encountered a problem in the process of creating my own megadepth_indices. I consulted LoFTR instructions and d2-net, but still couldn't find a way to create my own megadepth_indices. Can you provide the code for creating it?

What is the expected input values?

Here it seems like you expect [0,1]

accelerated_features/modules/xfeat.py

Line 343 in 5939744

if isinstance(x, np.ndarray):

But here: https://github.com/verlab/accelerated_features/blob/main/realtime_demo.py it seems you won't normalize from [0,255] -> [0,1] ?

I have two images img1 and img2,how to get the matched point in img2 given points in img1

I have two images img1 and img2,how to get the matched point in img2 given points（eg [px1,py1],[px2,py2]……） in img1. That means I just want to match some specific points in two images，not the full image.

XFeat + GIM

Hi,

Thanks for your amazing work!

Have you noticed the this GIM work: https://xuelunshen.com/gim/ ? It seems using internet videos to train the feature & matching networks is very powerful.

Do you have any plan to retrain the xfeat using their approach?

Best regards.

When the training code will be released？

some questions

Thank you for good job. some question as follows:
(1) I train my own data. the raw image size: 128*128. wo revised model to get big feature map.
(2) self._unfold2d(x, ws=8), the 8 is fixed-parameters, but the keypoint_position_loss and coordinate_classification_loss still have 8, is equal param, must modify at the same time?
(3) the generateRandomTPS has grid (8,6), what is the meaning? and it relates to (2) ws=8
(4) acc_f is always nan in my training, is it normal?
Loss: 7.6677 acc_c0 0.314 acc_c1 0.156 acc_f: nan loss_c: 5.476 loss_f: 8.002 loss_kp: 0.065 #matches_c: 64 loss_kp_pos: 16.468 acc_kp_pos: 0.030:

Do you plan to convert this code into C++code with onnx? I think that's a good idea

acc_f: nan

Input is RGB or BGR?

Hi @guipotje I have another question.
In the training code you use cv2.imread to read images. However, the default image read is BGR color mode. Is it a bug?

When there is a significant difference in the perspective of the images, the image matching effect is not good

The effect is not good when there is a large difference in viewing angles
The following image shows the effect when the viewing angles are similar

Minor typo in Section 3.1 of the paper

In Eq. (1), the $H_i ∗ W_i$ terms emerge as the primary computational bottleneck [...]

is probably meant to use \cdot instead of *, i.e.

In Eq. (1), the $H_i \cdot W_i$ terms emerge as the primary computational bottleneck [...]

in Section 3.1 of the paper. Also, there's an

accross

later in the section that contains one superfluous "c".

How to backpropagation with this work?

I want to use mkpts_0, mkpts_1 = xfeat.match_xfeat(im1, im2, top_k = 4096) to optimize different targets based on the distance between matches.
However, even if I remove the inference stuff, the back-propagation doesn't seem to work well.
Could you please tell me what I should do?

Thank you for your tremendous work.

Best regards

WIP combining xfeat with steerers

I have a WIP of combining xfeat with steerers. I'm posting here in case someone else than me is interested in looking at this.

I've trained two versions of XFeat using this fork, one with a fixed permutation steerer and one with a learned steerer (this seems to be slightly better). A quick colab demo for the fixed steerer is here. Weights are here. I haven't really evaluated well (when will your evaluation code be released approximately?), but on a quick test on HPatches, the new versions seem comparable to the original on upright images.

Anyway, happy to discuss ideas in this thread.

how to use cv.BFMatcher() for matching

Please tell me how to use cv.BFMatcher() for matching. I got an error when trying to match.

How does this method compare with LightGlue?

SuperPoint+LightGlue is also a fast and lightweight feature extraction and matching solution, so I think this model can be compared with them.

How is the warping performed to generate the synthetic training set?

Hello!

Thanks for your great work as well as the inference code. Two questions please:

What transformation(s) did you use to generate synthetic warping on MS-COCO images?
Will you release the warping function with the training code release as well?

Many thanks!

Whether the image preprocessing needs to be divided by 255?

preprocess_tensor is different with parse_input in xfeat.py.

How does this method perform if training without Reliability Map?

Hello. Thank you for your nice work and contribution of field.

I have some questions.

How does this method perform if training without Reliability Map?
Since the method detected keypoints with simple network(only 4 1*1 conv), did you validate some metric about keypoint detection, e.g. Repeatability?

I look forward to your reply and thank you for your excellent work!

Set up multiple GPUs during training

hello，I set multiple GPUs as this “--device_num '0,1,2,3,4,5,6,7,8'”，but still only the 0th gpu was used，it seems that the settings are not working。

Features of two similar image by Xfeat.match() didn't match perfectly.

The key points have significant location shift. I am wondering if there is a bug in the code base. Here is an example, of using XFeat for image alignment. The Xfeat is fast But it is not as accurate as SIFT and LIghtGlue.

Requesting a constants.pkl file for pre trained models

Xfeature has been very helpful to me. Recently, I have been using libtorch to modify it to C++code. During the testing phase, when loading the pre trained model, the following error occurred:
"Terminate called after throwing an instance of 'c10:: Error'"
What(): PytorchStreamReader failed locating file constants.pkl: file not found
”
So I would like to request a constants.pkl file in the pre trained model.Looking forward to your reply

xfeat+lg inference error

Can we train XFeat on a custom dataset?

using smaller resolution for training

Hi @guipotje
I want to use 256x256 images for training, the original image size of my dataset is 512x512. During training, the images will be automatically resized to 256x256, right? Could you tell me the correct way to set the parameters?
What I have done is to set training_res =256,256, grid=(5,5) in generateRandomTPS. But then the keypoint_pos loss is high (about 20), and the reliability is always increasing, and Accuracy/kp_position is always decreasing. This didn't happen when I set training_res=512,512. Thank you in advance.

Config issue in alike_wrapper.py

When I run the command below ,here comes an issue that seems to be relevant with code? Could you tell me how to fix this ?

PR to kornia?

acc_f nan

Hello, I am using the description in the README to reproduce your code.

During this process, I encountered some issues. I don't know why the acc_f is showing up as nan. I made some changes to the code and I'm not sure if these changes are related to the acc_f being nan.

I made the following changes:

On line 161 of 'augumentation.py', I changed im = np.rot90(im) to im = cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE) to avoid errors during training.
In utils.py, line 36, I changed data = np.array(h5py.File(f, 'r')['/depth']) to data = np.array(h5py.File(f, 'r')['/depth'], dtype=np.float32) to avoid errors during training.

Besides the changes mentioned, I did not modify anything else, but acc_f still shows nan. Could you explain why this might be happening?

Multi-modality?

Great work on this project, it looks very promising! What are your thought on using this architecture to find keypoints and descriptors across modalities? This paper attempts to match features across rgb and infrared images. Do you see any reasons why this might not work for your architecture?

Xfeat + LighterGlue (Google Colab)

Hello!

I am using the provided XFeat + LighterGlue Google Colab to test this new model. However, I am not sure Google Colab is loading the correct model. It seems that it is downloading xfeat.pt instead of xfeat-lighterglue.pt
Downloading: "https://github.com/verlab/accelerated_features/raw/main/weights/xfeat.pt" to /root/.cache/torch/hub/checkpoints/xfeat.pt

Is this ok?

Thanks!

UserWarning: torch.meshgrid

UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ..\aten\src\ATen\native\TensorShape.cpp:2157.)
return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]

../accelerated_features/third_party/alike_wrapper.py line 76
../accelerated_features/third_party/ALIKE/soft_detect.py line 88

Training-related issues

Could you explain how the model is trained on COCO since it doesn’t have labels? Is it by performing geometric transformations on the images and then ensuring that the feature points extracted by the model from the original and transformed images are the same?

Eval-MegaDepth-1500

Hello, I evaluated the method on MegaDepth based on the evaluation of LOFTR, but the effect is not good, I can't figure out the problem for the time being. Looking forward to your assessment.

Does XFeat work for two similar images but one of them is upsidedown?

I just found if two similar images (or same), if one of them is upside down, the matching points are horrible. It didn't work as SIFT, which matches the features between both images and can rotate back when aligning two images.

Am I wrong here? or any suggestions

Otherwise, it is fast and accurate if two images are the same orientation.

Thanks

How to improve the success rate of matching the same feature point in multiple consecutive images using xfeat features

I used xfeat and sift to compare the success rate of matching the feature point in multiple consective images, and the sift is better. then i want to know that how can I get a better results with xfeat?

Image matching similarity

How do I get two Image matching similarity through these keypoints?

revise self._unfold2d(x, ws=8) ?

HI, I trained my own data. image size about 128*128, and change model` self.block1 = nn.Sequential(
BasicLayer( 1, 8, stride=1),
BasicLayer( 8, 24, stride=1),
BasicLayer( 24, 64, stride=1),
)
self.block2 = nn.Sequential(
BasicLayer(64, 64, stride=2),
BasicLayer(64, 64, stride=1),
BasicLayer(64, 64, stride=1),
)

	self.block3 = nn.Sequential(
									BasicLayer( 64, 128, stride=2),
									BasicLayer(128, 128, stride=1),
									BasicLayer(128, 128, stride=1),
									BasicLayer(128,  64, 1, padding=0),
								 )

	self.block_fusion =  nn.Sequential(
									BasicLayer(64, 64, stride=1),
									BasicLayer(64, 64, stride=1),
									nn.Conv2d (64, 64, 1, padding=0)
								 )

	self.heatmap_head = nn.Sequential(
									BasicLayer(64, 64, 1, padding=0),
									BasicLayer(64, 64, 1, padding=0),
									nn.Conv2d (64, 1, 1),
									nn.Sigmoid()
								)


	self.keypoint_head = nn.Sequential(
									BasicLayer(4, 64, 1, padding=0),
									BasicLayer(64, 64, 1, padding=0),
									BasicLayer(64, 64, 1, padding=0),
									nn.Conv2d (64, 5, 1),
								)

and forward change as follow: def forward(self, x):
"""
input:
x -> torch.Tensor(B, C, H, W) grayscale or rgb images
return:
feats -> torch.Tensor(B, 64, H/8, W/8) dense local features
keypoints -> torch.Tensor(B, 65, H/8, W/8) keypoint logit map
heatmap -> torch.Tensor(B, 1, H/8, W/8) reliability map

	"""
	#dont backprop through normalization
	with torch.no_grad():
		x = x.mean(dim=1, keepdim = True)
		x = self.norm(x)

	#main backbone
	x1 = self.block1(x)
	x2 = self.block2(x1)
	x3 = self.block3(x2)
	x4 = F.interpolate(x3, (x2.shape[-2], x2.shape[-1]), mode='bilinear')
	feats = self.block_fusion(x4 + x2)
	
	#heads
	heatmap = self.heatmap_head(feats)                        # Reliability map
	keypoints = self.keypoint_head(self._unfold2d(x, ws=2))   # Keypoint map logits
	return feats, keypoints, heatmap`

the unflod2d ws change to 2, how to revise keypoint_head ? and how to revise losses.py?
thank you

Extract feature points manually

I have a task, I want to use the coordinates of the center point of the detection box of the target detection as the feature point input, so as to match the next frame image to achieve the tracking effect. How are the feature points of XFeat learned?Methods such as COTR do this, but are inefficient.

Use both: keypoints + refinementMLP ?

I can see that match_xfeat and match_xfeat_star use different strategies for fine grained feature matching:

`match_xfeat`

On one hand, match_xfeat uses the model keypoints output (fine-grained heatmap) to first extract the mkpts and then use those mkpts to bicubic interpolate the descriptors (aka feats) to the correct offest.

`match_xfeat_star`

On the other hand match_xfeat_star, totally ignores the model keypoints output. match_xfeat_star get the correct offsets by using the refinementMLP on the raw model feats (no feats interpolation).

My idea: Use both for better performance ?

	Use keypoints and interpolate feats	Use refinementMLP to offest mkpts
`match_xfeat`	✅	❌
`match_xfeat_star`	❌	✅
Use both	✅	✅

My idea to (maybe) achieve a better accuracy is to use the refinementMLP on the bicubic-interpolated feats to further correct the offset.

Usage on multimodal data

Nice work!

Did you already tried some tests with multimodal data? Such as depth images and thermal?

Training XFEAT

I am very grateful to your comprehensive and organized repository on GitHub.
When do you plan to release the code for training networks for custom applications?"

XFeat + LightGlue

Hello everyone,

I'm training some LightGlue variations (finding a neat trade-off between model size vs accuracy) and soon I will update the repo with the model and weights in the next weeks!

You can follow this issue if you are interested.

Best,

Guilherme

training

Hello, I want to do some fine-tuning based on your trained model and my own dataset. Since there is no label, I want to replace the coco20k dataset with my own dataset, or add some of my own dataset to it. Do you recommend "full training" or "training with existing weights"? Do I need to modify the training hyperparameters and strategies? Looking forward to your reply.

@torch.inference_mode()
	def detectAndCompute(self, x, top_k = None, detection_threshold = None):
		"""
			Compute sparse keypoints & descriptors. Supports batched mode.

			input:
				x -> torch.Tensor(B, C, H, W): grayscale or rgb image
				top_k -> int: keep best k features
			return:
				List[Dict]: 
					'keypoints'    ->   torch.Tensor(N, 2): keypoints (x,y)
					'scores'       ->   torch.Tensor(N,): keypoint scores
					'descriptors'  ->   torch.Tensor(N, 64): local features
		"""
		if top_k is None: top_k = self.top_k
		if detection_threshold is None: detection_threshold = self.detection_threshold
		x, rh1, rw1 = self.preprocess_tensor(x)

		B, _, _H1, _W1 = x.shape
        
		M1, K1, H1 = self.net(x)
		mod = torch.jit.trace(self.net, x)
		torch.jit.save(mod,"feature.pt")
		M1 = F.normalize(M1, dim=1)

		#Convert logits to heatmap and extract kpts
		K1h = self.get_kpts_heatmap(K1)

		array1=K1h.numpy()#将tensor数据转为numpy数据
		maxValue=array1.max()
		array1=array1*255#normalize，将图像数据扩展到[0,255]
		mat=np.uint8(array1)#float32-->uint8
		print('mat_shape:',mat.shape)#mat_shape: (3, 982, 814)
		mat=mat[0, : , :, :].transpose(1,2,0)#mat_shape: (982, 814，3)
		cv2.imshow("img",mat)
		
		array2=H1.numpy()#将tensor数据转为numpy数据
		maxValue=array2.max()
		print(maxValue)
		#array2=array2*255/maxValue#normalize，将图像数据扩展到[0,255]
		array2=array2 * 255#normalize，将图像数据扩展到[0,255]
		mat=np.uint8(array2)#float32-->uint8
		print('mat_shape:',mat.shape)#mat_shape: (3, 982, 814)
		mat=mat[0, : , :, :].transpose(1,2,0)#mat_shape: (982, 814，3)
		cv2.imshow("img2",mat)
		cv2.waitKey()

		mkpts = self.NMS(K1h, threshold=detection_threshold, kernel_size=5)

		#Compute reliability scores
		_nearest = InterpolateSparse2d('nearest')
		_bilinear = InterpolateSparse2d('bilinear')
		scores = (_nearest(K1h, mkpts, _H1, _W1) * _bilinear(H1, mkpts, _H1, _W1)).squeeze(-1)
		scores[torch.all(mkpts == 0, dim=-1)] = -1

		#Select top-k features
		idxs = torch.argsort(-scores)
		mkpts_x  = torch.gather(mkpts[...,0], -1, idxs)[:, :top_k]
		mkpts_y  = torch.gather(mkpts[...,1], -1, idxs)[:, :top_k]
		mkpts = torch.cat([mkpts_x[...,None], mkpts_y[...,None]], dim=-1)
		scores = torch.gather(scores, -1, idxs)[:, :top_k]

		#Interpolate descriptors at kpts positions
		feats = self.interpolator(M1, mkpts, H = _H1, W = _W1)

		#L2-Normalize
		feats = F.normalize(feats, dim=-1)

		#Correct kpt scale
		mkpts = mkpts * torch.tensor([rw1,rh1], device=mkpts.device).view(1, 1, -1)

		valid = scores > 0
		return [  
				   {'keypoints': mkpts[b][valid[b]],
					'scores': scores[b][valid[b]],
					'descriptors': feats[b][valid[b]]} for b in range(B) 
			   `]`

Can you take a look at what's going on? thank you

evaluation results

looking forward to your evaluation scripts, thanks!