| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243 |
- #include "../pch/pch.h"
- #include "SQLiteVecManager.h"
- #include <stdexcept>
- #include <iostream>
- #include <cmath>
- #include <algorithm>
- #include "../tool/debuglog.h"
- SQLiteVecManager::SQLiteVecManager(const std::string & databaseName) : dbName(databaseName), db(nullptr)
- {
- int rc = SQLITE_OK;
- sqlite3_stmt* stmt;
- rc = sqlite3_open(dbName.c_str(), &db);
- assert(rc == SQLITE_OK);
- if (rc != SQLITE_OK)
- {
- std::string err = "Can't open database: " + std::string(sqlite3_errmsg(db));
- throw std::runtime_error("Can't open database: " + std::string(sqlite3_errmsg(db)));
- }
- // 初始化 sqlite-vec 扩展
- char* errMsg = 0;
- rc = sqlite3_vec_init(db, &errMsg, 0);
- assert(rc == SQLITE_OK);
- if (rc != SQLITE_OK)
- {
- std::string err = "Can't init vec: " + std::string(sqlite3_errmsg(db));
- throw std::runtime_error("Can't init vec: " + std::string(sqlite3_errmsg(db)));
- }
- rc = sqlite3_prepare_v2(db, "SELECT sqlite_version(), vec_version()", -1, &stmt, NULL);
- assert(rc == SQLITE_OK);
- rc = sqlite3_step(stmt);
- DEBUG_HELPER::debug_printf("sqlite_version=%s, vec_version=%s\n", sqlite3_column_text(stmt, 0), sqlite3_column_text(stmt, 1));
- sqlite3_finalize(stmt);
- }
- SQLiteVecManager::~SQLiteVecManager()
- {
- if (db)
- {
- sqlite3_close(db);
- }
- }
- bool SQLiteVecManager::initializeDatabase(int vectorDimension)
- {
- int rc;
- char * errMsg = 0;
- std::cout << "使用sqlite-vec扩展进行向量存储和搜索" << std::endl;
- // 创建vec0虚拟表
- std::string sql = R"(CREATE VIRTUAL TABLE IF NOT EXISTS image_features USING vec0(
- id INTEGER PRIMARY KEY AUTOINCREMENT,
- image_path TEXT UNIQUE NOT NULL,
- feature_vector FLOAT[)" + std::to_string(vectorDimension) + "] distance_metric=cosine)";
- rc = sqlite3_exec(db, sql.c_str(), 0, 0, &errMsg);
- if (rc != SQLITE_OK)
- {
- std::string err = std::string("Failed to create vec0 table: ") + errMsg;
- sqlite3_free(errMsg);
- return false;
- }
- return true;
- }
- bool SQLiteVecManager::addFeatureVector(const std::vector<float> & features, const std::string & imagePath)
- {
- const char * sql = "INSERT INTO image_features(id, image_path, feature_vector) VALUES (?, ?, ?);";
- sqlite3_stmt * stmt;
- int rc = sqlite3_prepare_v2(db, sql, -1, &stmt, NULL);
- if (rc != SQLITE_OK)
- {
- std::string err = sqlite3_errmsg(db);
- std::cerr << "Failed to insert feature vector: " << err << std::endl;
- return false;
- }
- sqlite3_bind_int(stmt, 1, static_cast<int>(getFeatureCount() + 1));
- sqlite3_bind_text(stmt, 2, imagePath.c_str(), -1, SQLITE_STATIC);
- //std::string blobData = vectorToBlob(features);
- sqlite3_bind_blob(stmt, 3, features.data(), features.size() * sizeof(float), SQLITE_STATIC);
- rc = sqlite3_step(stmt);
- if (rc != SQLITE_DONE) {
- std::string err = sqlite3_errmsg(db);
- std::cerr << "Failed to insert feature vector: " << err << std::endl;
- sqlite3_finalize(stmt);
- return false;
- }
- sqlite3_finalize(stmt);
- return rc == SQLITE_DONE;
- }
- std::vector<std::pair<std::string, float>> SQLiteVecManager::searchSimilarVectors(const std::vector<float> & queryVector, int k)
- {
- std::vector<std::pair<std::string, float>> results;
- std::cout << "使用sqlite-vec扩展进行向量搜索" << std::endl;
- // 使用sqlite-vec的向量搜索功能
- std::string blobData = vectorToBlob(queryVector);
- std::string sql =
- "SELECT image_path, distance "
- "FROM image_features "
- "WHERE feature_vector MATCH ?1 "
- "ORDER BY distance "
- "LIMIT " + std::to_string(k) + ";";
- sqlite3_stmt * stmt;
- int rc = sqlite3_prepare_v2(db, sql.c_str(), -1, &stmt, NULL);
- if (rc != SQLITE_OK)
- {
- std::string err = sqlite3_errmsg(db);
- std::cerr << "Failed to insert feature vector: " << err << std::endl;
- return results;
- }
- sqlite3_bind_blob(stmt, 1, queryVector.data(), queryVector.size() * sizeof(float), SQLITE_STATIC);
- while ((rc = sqlite3_step(stmt)) == SQLITE_ROW)
- {
- const char * imagePath = reinterpret_cast<const char *>(sqlite3_column_text(stmt, 0));
- float distance = static_cast<float>(sqlite3_column_double(stmt, 1));
- float similarity = distanceToSimilarity(distance);
- results.emplace_back(std::string(imagePath), similarity);
- }
- sqlite3_finalize(stmt);
- return results;
- }
- void SQLiteVecManager::saveDatabase()
- {
- sqlite3_exec(db, "PRAGMA optimize;", 0, 0, 0);
- }
- bool SQLiteVecManager::loadDatabase()
- {
- const char * sql = "SELECT name FROM sqlite_master WHERE type='table' AND name='image_features';";
- sqlite3_stmt * stmt;
- int rc = sqlite3_prepare_v2(db, sql, -1, &stmt, NULL);
- if (rc == SQLITE_OK && sqlite3_step(stmt) == SQLITE_ROW)
- {
- sqlite3_finalize(stmt);
- return true;
- }
- sqlite3_finalize(stmt);
- return false;
- }
- int SQLiteVecManager::getFeatureCount() const
- {
- const char * sql = "SELECT COUNT(*) FROM image_features;";
- sqlite3_stmt * stmt;
- int rc = sqlite3_prepare_v2(db, sql, -1, &stmt, NULL);
- if (rc == SQLITE_OK && sqlite3_step(stmt) == SQLITE_ROW)
- {
- int count = sqlite3_column_int(stmt, 0);
- sqlite3_finalize(stmt);
- return count;
- }
- sqlite3_finalize(stmt);
- return 0;
- }
- bool SQLiteVecManager::isEmpty() const
- {
- return getFeatureCount() == 0;
- }
- std::string SQLiteVecManager::vectorToBlob(const std::vector<float> & vec)
- {
- std::string vecStr;
- for (std::size_t i = 0; i < vec.size(); i++) {
- vecStr += std::to_string(vec[i]);
- if (i != vec.size() - 1) {
- vecStr += ",";
- }
- }
- return vecStr;
- }
- std::vector<float> SQLiteVecManager::blobToVector(const std::string & blob)
- {
- const float * data = reinterpret_cast<const float *>(blob.data());
- size_t count = blob.size() / sizeof(float);
- return std::vector<float>(data, data + count);
- }
- float SQLiteVecManager::calculateCosineSimilarity(const std::vector<float> & vec1, const std::vector<float> & vec2)
- {
- if (vec1.size() != vec2.size() || vec1.empty())
- {
- return 0.0f;
- }
- float dotProduct = 0.0f;
- float norm1 = 0.0f;
- float norm2 = 0.0f;
- for (size_t i = 0; i < vec1.size(); ++i)
- {
- dotProduct += vec1[i] * vec2[i];
- norm1 += vec1[i] * vec1[i];
- norm2 += vec2[i] * vec2[i];
- }
- if (norm1 == 0.0f || norm2 == 0.0f)
- {
- return 0.0f;
- }
- return dotProduct / (std::sqrt(norm1) * std::sqrt(norm2));
- }
- float SQLiteVecManager::distanceToSimilarity(float distance)
- {
- return 1.0f - distance;
- }
|