sqlite-vec.c 298 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757
  1. #include "sqlite-vec.h"
  2. #include <assert.h>
  3. #include <errno.h>
  4. #include <float.h>
  5. #include <inttypes.h>
  6. #include <limits.h>
  7. #include <math.h>
  8. #include <stdbool.h>
  9. #include <stdint.h>
  10. #include <stdlib.h>
  11. #include <string.h>
  12. #ifndef SQLITE_VEC_OMIT_FS
  13. #include <stdio.h>
  14. #endif
  15. #ifndef SQLITE_CORE
  16. #include "sqlite3ext.h"
  17. SQLITE_EXTENSION_INIT1
  18. #else
  19. #include "sqlite3.h"
  20. #endif
  21. #ifndef UINT32_TYPE
  22. #ifdef HAVE_UINT32_T
  23. #define UINT32_TYPE uint32_t
  24. #else
  25. #define UINT32_TYPE unsigned int
  26. #endif
  27. #endif
  28. #ifndef UINT16_TYPE
  29. #ifdef HAVE_UINT16_T
  30. #define UINT16_TYPE uint16_t
  31. #else
  32. #define UINT16_TYPE unsigned short int
  33. #endif
  34. #endif
  35. #ifndef INT16_TYPE
  36. #ifdef HAVE_INT16_T
  37. #define INT16_TYPE int16_t
  38. #else
  39. #define INT16_TYPE short int
  40. #endif
  41. #endif
  42. #ifndef UINT8_TYPE
  43. #ifdef HAVE_UINT8_T
  44. #define UINT8_TYPE uint8_t
  45. #else
  46. #define UINT8_TYPE unsigned char
  47. #endif
  48. #endif
  49. #ifndef INT8_TYPE
  50. #ifdef HAVE_INT8_T
  51. #define INT8_TYPE int8_t
  52. #else
  53. #define INT8_TYPE signed char
  54. #endif
  55. #endif
  56. #ifndef LONGDOUBLE_TYPE
  57. #define LONGDOUBLE_TYPE long double
  58. #endif
  59. #ifndef _WIN32
  60. #ifndef __EMSCRIPTEN__
  61. #ifndef __COSMOPOLITAN__
  62. #ifndef __wasi__
  63. typedef u_int8_t uint8_t;
  64. typedef u_int16_t uint16_t;
  65. typedef u_int64_t uint64_t;
  66. #endif
  67. #endif
  68. #endif
  69. #endif
  70. typedef int8_t i8;
  71. typedef uint8_t u8;
  72. typedef int16_t i16;
  73. typedef int32_t i32;
  74. typedef sqlite3_int64 i64;
  75. typedef uint32_t u32;
  76. typedef uint64_t u64;
  77. typedef float f32;
  78. typedef size_t usize;
  79. #ifndef UNUSED_PARAMETER
  80. #define UNUSED_PARAMETER(X) (void)(X)
  81. #endif
  82. // sqlite3_vtab_in() was added in SQLite version 3.38 (2022-02-22)
  83. // https://www.sqlite.org/changes.html#version_3_38_0
  84. #if SQLITE_VERSION_NUMBER >= 3038000
  85. #define COMPILER_SUPPORTS_VTAB_IN 1
  86. #endif
  87. #ifndef SQLITE_SUBTYPE
  88. #define SQLITE_SUBTYPE 0x000100000
  89. #endif
  90. #ifndef SQLITE_RESULT_SUBTYPE
  91. #define SQLITE_RESULT_SUBTYPE 0x001000000
  92. #endif
  93. #ifndef SQLITE_INDEX_CONSTRAINT_LIMIT
  94. #define SQLITE_INDEX_CONSTRAINT_LIMIT 73
  95. #endif
  96. #ifndef SQLITE_INDEX_CONSTRAINT_OFFSET
  97. #define SQLITE_INDEX_CONSTRAINT_OFFSET 74
  98. #endif
  99. #define countof(x) (sizeof(x) / sizeof((x)[0]))
  100. #define min(a, b) (((a) <= (b)) ? (a) : (b))
  101. enum VectorElementType {
  102. // clang-format off
  103. SQLITE_VEC_ELEMENT_TYPE_FLOAT32 = 223 + 0,
  104. SQLITE_VEC_ELEMENT_TYPE_BIT = 223 + 1,
  105. SQLITE_VEC_ELEMENT_TYPE_INT8 = 223 + 2,
  106. // clang-format on
  107. };
  108. #ifdef SQLITE_VEC_ENABLE_AVX
  109. #include <immintrin.h>
  110. #define PORTABLE_ALIGN32 __attribute__((aligned(32)))
  111. #define PORTABLE_ALIGN64 __attribute__((aligned(64)))
  112. static f32 l2_sqr_float_avx(const void *pVect1v, const void *pVect2v,
  113. const void *qty_ptr) {
  114. f32 *pVect1 = (f32 *)pVect1v;
  115. f32 *pVect2 = (f32 *)pVect2v;
  116. size_t qty = *((size_t *)qty_ptr);
  117. f32 PORTABLE_ALIGN32 TmpRes[8];
  118. size_t qty16 = qty >> 4;
  119. const f32 *pEnd1 = pVect1 + (qty16 << 4);
  120. __m256 diff, v1, v2;
  121. __m256 sum = _mm256_set1_ps(0);
  122. while (pVect1 < pEnd1) {
  123. v1 = _mm256_loadu_ps(pVect1);
  124. pVect1 += 8;
  125. v2 = _mm256_loadu_ps(pVect2);
  126. pVect2 += 8;
  127. diff = _mm256_sub_ps(v1, v2);
  128. sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
  129. v1 = _mm256_loadu_ps(pVect1);
  130. pVect1 += 8;
  131. v2 = _mm256_loadu_ps(pVect2);
  132. pVect2 += 8;
  133. diff = _mm256_sub_ps(v1, v2);
  134. sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
  135. }
  136. _mm256_store_ps(TmpRes, sum);
  137. return sqrt(TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] +
  138. TmpRes[5] + TmpRes[6] + TmpRes[7]);
  139. }
  140. #endif
  141. #ifdef SQLITE_VEC_ENABLE_NEON
  142. #include <arm_neon.h>
  143. #define PORTABLE_ALIGN32 __attribute__((aligned(32)))
  144. // thx https://github.com/nmslib/hnswlib/pull/299/files
  145. static f32 l2_sqr_float_neon(const void *pVect1v, const void *pVect2v,
  146. const void *qty_ptr) {
  147. f32 *pVect1 = (f32 *)pVect1v;
  148. f32 *pVect2 = (f32 *)pVect2v;
  149. size_t qty = *((size_t *)qty_ptr);
  150. size_t qty16 = qty >> 4;
  151. const f32 *pEnd1 = pVect1 + (qty16 << 4);
  152. float32x4_t diff, v1, v2;
  153. float32x4_t sum0 = vdupq_n_f32(0);
  154. float32x4_t sum1 = vdupq_n_f32(0);
  155. float32x4_t sum2 = vdupq_n_f32(0);
  156. float32x4_t sum3 = vdupq_n_f32(0);
  157. while (pVect1 < pEnd1) {
  158. v1 = vld1q_f32(pVect1);
  159. pVect1 += 4;
  160. v2 = vld1q_f32(pVect2);
  161. pVect2 += 4;
  162. diff = vsubq_f32(v1, v2);
  163. sum0 = vfmaq_f32(sum0, diff, diff);
  164. v1 = vld1q_f32(pVect1);
  165. pVect1 += 4;
  166. v2 = vld1q_f32(pVect2);
  167. pVect2 += 4;
  168. diff = vsubq_f32(v1, v2);
  169. sum1 = vfmaq_f32(sum1, diff, diff);
  170. v1 = vld1q_f32(pVect1);
  171. pVect1 += 4;
  172. v2 = vld1q_f32(pVect2);
  173. pVect2 += 4;
  174. diff = vsubq_f32(v1, v2);
  175. sum2 = vfmaq_f32(sum2, diff, diff);
  176. v1 = vld1q_f32(pVect1);
  177. pVect1 += 4;
  178. v2 = vld1q_f32(pVect2);
  179. pVect2 += 4;
  180. diff = vsubq_f32(v1, v2);
  181. sum3 = vfmaq_f32(sum3, diff, diff);
  182. }
  183. f32 sum_scalar =
  184. vaddvq_f32(vaddq_f32(vaddq_f32(sum0, sum1), vaddq_f32(sum2, sum3)));
  185. const f32 *pEnd2 = pVect1 + (qty - (qty16 << 4));
  186. while (pVect1 < pEnd2) {
  187. f32 diff = *pVect1 - *pVect2;
  188. sum_scalar += diff * diff;
  189. pVect1++;
  190. pVect2++;
  191. }
  192. return sqrt(sum_scalar);
  193. }
  194. static f32 l2_sqr_int8_neon(const void *pVect1v, const void *pVect2v,
  195. const void *qty_ptr) {
  196. i8 *pVect1 = (i8 *)pVect1v;
  197. i8 *pVect2 = (i8 *)pVect2v;
  198. size_t qty = *((size_t *)qty_ptr);
  199. const i8 *pEnd1 = pVect1 + qty;
  200. i32 sum_scalar = 0;
  201. while (pVect1 < pEnd1 - 7) {
  202. // loading 8 at a time
  203. int8x8_t v1 = vld1_s8(pVect1);
  204. int8x8_t v2 = vld1_s8(pVect2);
  205. pVect1 += 8;
  206. pVect2 += 8;
  207. // widen to protect against overflow
  208. int16x8_t v1_wide = vmovl_s8(v1);
  209. int16x8_t v2_wide = vmovl_s8(v2);
  210. int16x8_t diff = vsubq_s16(v1_wide, v2_wide);
  211. int16x8_t squared_diff = vmulq_s16(diff, diff);
  212. int32x4_t sum = vpaddlq_s16(squared_diff);
  213. sum_scalar += vgetq_lane_s32(sum, 0) + vgetq_lane_s32(sum, 1) +
  214. vgetq_lane_s32(sum, 2) + vgetq_lane_s32(sum, 3);
  215. }
  216. // handle leftovers
  217. while (pVect1 < pEnd1) {
  218. i16 diff = (i16)*pVect1 - (i16)*pVect2;
  219. sum_scalar += diff * diff;
  220. pVect1++;
  221. pVect2++;
  222. }
  223. return sqrtf(sum_scalar);
  224. }
  225. static i32 l1_int8_neon(const void *pVect1v, const void *pVect2v,
  226. const void *qty_ptr) {
  227. i8 *pVect1 = (i8 *)pVect1v;
  228. i8 *pVect2 = (i8 *)pVect2v;
  229. size_t qty = *((size_t *)qty_ptr);
  230. const int8_t *pEnd1 = pVect1 + qty;
  231. int32x4_t acc1 = vdupq_n_s32(0);
  232. int32x4_t acc2 = vdupq_n_s32(0);
  233. int32x4_t acc3 = vdupq_n_s32(0);
  234. int32x4_t acc4 = vdupq_n_s32(0);
  235. while (pVect1 < pEnd1 - 63) {
  236. int8x16_t v1 = vld1q_s8(pVect1);
  237. int8x16_t v2 = vld1q_s8(pVect2);
  238. int8x16_t diff1 = vabdq_s8(v1, v2);
  239. acc1 = vaddq_s32(acc1, vpaddlq_u16(vpaddlq_u8(diff1)));
  240. v1 = vld1q_s8(pVect1 + 16);
  241. v2 = vld1q_s8(pVect2 + 16);
  242. int8x16_t diff2 = vabdq_s8(v1, v2);
  243. acc2 = vaddq_s32(acc2, vpaddlq_u16(vpaddlq_u8(diff2)));
  244. v1 = vld1q_s8(pVect1 + 32);
  245. v2 = vld1q_s8(pVect2 + 32);
  246. int8x16_t diff3 = vabdq_s8(v1, v2);
  247. acc3 = vaddq_s32(acc3, vpaddlq_u16(vpaddlq_u8(diff3)));
  248. v1 = vld1q_s8(pVect1 + 48);
  249. v2 = vld1q_s8(pVect2 + 48);
  250. int8x16_t diff4 = vabdq_s8(v1, v2);
  251. acc4 = vaddq_s32(acc4, vpaddlq_u16(vpaddlq_u8(diff4)));
  252. pVect1 += 64;
  253. pVect2 += 64;
  254. }
  255. while (pVect1 < pEnd1 - 15) {
  256. int8x16_t v1 = vld1q_s8(pVect1);
  257. int8x16_t v2 = vld1q_s8(pVect2);
  258. int8x16_t diff = vabdq_s8(v1, v2);
  259. acc1 = vaddq_s32(acc1, vpaddlq_u16(vpaddlq_u8(diff)));
  260. pVect1 += 16;
  261. pVect2 += 16;
  262. }
  263. int32x4_t acc = vaddq_s32(vaddq_s32(acc1, acc2), vaddq_s32(acc3, acc4));
  264. int32_t sum = 0;
  265. while (pVect1 < pEnd1) {
  266. int32_t diff = abs((int32_t)*pVect1 - (int32_t)*pVect2);
  267. sum += diff;
  268. pVect1++;
  269. pVect2++;
  270. }
  271. return vaddvq_s32(acc) + sum;
  272. }
  273. static double l1_f32_neon(const void *pVect1v, const void *pVect2v,
  274. const void *qty_ptr) {
  275. f32 *pVect1 = (f32 *)pVect1v;
  276. f32 *pVect2 = (f32 *)pVect2v;
  277. size_t qty = *((size_t *)qty_ptr);
  278. const f32 *pEnd1 = pVect1 + qty;
  279. float64x2_t acc = vdupq_n_f64(0);
  280. while (pVect1 < pEnd1 - 3) {
  281. float32x4_t v1 = vld1q_f32(pVect1);
  282. float32x4_t v2 = vld1q_f32(pVect2);
  283. pVect1 += 4;
  284. pVect2 += 4;
  285. // f32x4 -> f64x2 pad for overflow
  286. float64x2_t low_diff = vabdq_f64(vcvt_f64_f32(vget_low_f32(v1)),
  287. vcvt_f64_f32(vget_low_f32(v2)));
  288. float64x2_t high_diff =
  289. vabdq_f64(vcvt_high_f64_f32(v1), vcvt_high_f64_f32(v2));
  290. acc = vaddq_f64(acc, vaddq_f64(low_diff, high_diff));
  291. }
  292. double sum = 0;
  293. while (pVect1 < pEnd1) {
  294. sum += fabs((double)*pVect1 - (double)*pVect2);
  295. pVect1++;
  296. pVect2++;
  297. }
  298. return vaddvq_f64(acc) + sum;
  299. }
  300. #endif
  301. static f32 l2_sqr_float(const void *pVect1v, const void *pVect2v,
  302. const void *qty_ptr) {
  303. f32 *pVect1 = (f32 *)pVect1v;
  304. f32 *pVect2 = (f32 *)pVect2v;
  305. size_t qty = *((size_t *)qty_ptr);
  306. f32 res = 0;
  307. for (size_t i = 0; i < qty; i++) {
  308. f32 t = *pVect1 - *pVect2;
  309. pVect1++;
  310. pVect2++;
  311. res += t * t;
  312. }
  313. return sqrt(res);
  314. }
  315. static f32 l2_sqr_int8(const void *pA, const void *pB, const void *pD) {
  316. i8 *a = (i8 *)pA;
  317. i8 *b = (i8 *)pB;
  318. size_t d = *((size_t *)pD);
  319. f32 res = 0;
  320. for (size_t i = 0; i < d; i++) {
  321. f32 t = *a - *b;
  322. a++;
  323. b++;
  324. res += t * t;
  325. }
  326. return sqrt(res);
  327. }
  328. static f32 distance_l2_sqr_float(const void *a, const void *b, const void *d) {
  329. #ifdef SQLITE_VEC_ENABLE_NEON
  330. if ((*(const size_t *)d) > 16) {
  331. return l2_sqr_float_neon(a, b, d);
  332. }
  333. #endif
  334. #ifdef SQLITE_VEC_ENABLE_AVX
  335. if (((*(const size_t *)d) % 16 == 0)) {
  336. return l2_sqr_float_avx(a, b, d);
  337. }
  338. #endif
  339. return l2_sqr_float(a, b, d);
  340. }
  341. static f32 distance_l2_sqr_int8(const void *a, const void *b, const void *d) {
  342. #ifdef SQLITE_VEC_ENABLE_NEON
  343. if ((*(const size_t *)d) > 7) {
  344. return l2_sqr_int8_neon(a, b, d);
  345. }
  346. #endif
  347. return l2_sqr_int8(a, b, d);
  348. }
  349. static i32 l1_int8(const void *pA, const void *pB, const void *pD) {
  350. i8 *a = (i8 *)pA;
  351. i8 *b = (i8 *)pB;
  352. size_t d = *((size_t *)pD);
  353. i32 res = 0;
  354. for (size_t i = 0; i < d; i++) {
  355. res += abs(*a - *b);
  356. a++;
  357. b++;
  358. }
  359. return res;
  360. }
  361. static i32 distance_l1_int8(const void *a, const void *b, const void *d) {
  362. #ifdef SQLITE_VEC_ENABLE_NEON
  363. if ((*(const size_t *)d) > 15) {
  364. return l1_int8_neon(a, b, d);
  365. }
  366. #endif
  367. return l1_int8(a, b, d);
  368. }
  369. static double l1_f32(const void *pA, const void *pB, const void *pD) {
  370. f32 *a = (f32 *)pA;
  371. f32 *b = (f32 *)pB;
  372. size_t d = *((size_t *)pD);
  373. double res = 0;
  374. for (size_t i = 0; i < d; i++) {
  375. res += fabs((double)*a - (double)*b);
  376. a++;
  377. b++;
  378. }
  379. return res;
  380. }
  381. static double distance_l1_f32(const void *a, const void *b, const void *d) {
  382. #ifdef SQLITE_VEC_ENABLE_NEON
  383. if ((*(const size_t *)d) > 3) {
  384. return l1_f32_neon(a, b, d);
  385. }
  386. #endif
  387. return l1_f32(a, b, d);
  388. }
  389. static f32 distance_cosine_float(const void *pVect1v, const void *pVect2v,
  390. const void *qty_ptr) {
  391. f32 *pVect1 = (f32 *)pVect1v;
  392. f32 *pVect2 = (f32 *)pVect2v;
  393. size_t qty = *((size_t *)qty_ptr);
  394. f32 dot = 0;
  395. f32 aMag = 0;
  396. f32 bMag = 0;
  397. for (size_t i = 0; i < qty; i++) {
  398. dot += *pVect1 * *pVect2;
  399. aMag += *pVect1 * *pVect1;
  400. bMag += *pVect2 * *pVect2;
  401. pVect1++;
  402. pVect2++;
  403. }
  404. return 1 - (dot / (sqrt(aMag) * sqrt(bMag)));
  405. }
  406. static f32 distance_cosine_int8(const void *pA, const void *pB,
  407. const void *pD) {
  408. i8 *a = (i8 *)pA;
  409. i8 *b = (i8 *)pB;
  410. size_t d = *((size_t *)pD);
  411. f32 dot = 0;
  412. f32 aMag = 0;
  413. f32 bMag = 0;
  414. for (size_t i = 0; i < d; i++) {
  415. dot += *a * *b;
  416. aMag += *a * *a;
  417. bMag += *b * *b;
  418. a++;
  419. b++;
  420. }
  421. return 1 - (dot / (sqrt(aMag) * sqrt(bMag)));
  422. }
  423. // https://github.com/facebookresearch/faiss/blob/77e2e79cd0a680adc343b9840dd865da724c579e/faiss/utils/hamming_distance/common.h#L34
  424. static u8 hamdist_table[256] = {
  425. 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
  426. 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
  427. 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
  428. 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
  429. 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
  430. 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
  431. 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
  432. 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
  433. 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
  434. 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
  435. 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
  436. static f32 distance_hamming_u8(u8 *a, u8 *b, size_t n) {
  437. int same = 0;
  438. for (unsigned long i = 0; i < n; i++) {
  439. same += hamdist_table[a[i] ^ b[i]];
  440. }
  441. return (f32)same;
  442. }
  443. #ifdef _MSC_VER
  444. #if !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
  445. // From
  446. // https://github.com/ngtcp2/ngtcp2/blob/b64f1e77b5e0d880b93d31f474147fae4a1d17cc/lib/ngtcp2_ringbuf.c,
  447. // line 34-43
  448. static unsigned int __builtin_popcountl(unsigned int x) {
  449. unsigned int c = 0;
  450. for (; x; ++c) {
  451. x &= x - 1;
  452. }
  453. return c;
  454. }
  455. #else
  456. #include <intrin.h>
  457. /*x86ÉÏûÓÐ__popcnt64£¬»á±¨´í£¬¸ÄΪ__popcnt*/
  458. #ifdef WIN32
  459. #define __builtin_popcountl __popcnt
  460. #else
  461. #define __builtin_popcountl __popcnt64
  462. #endif
  463. #endif
  464. #endif
  465. static f32 distance_hamming_u64(u64 *a, u64 *b, size_t n) {
  466. int same = 0;
  467. for (unsigned long i = 0; i < n; i++) {
  468. same += __builtin_popcountl(a[i] ^ b[i]);
  469. }
  470. return (f32)same;
  471. }
  472. /**
  473. * @brief Calculate the hamming distance between two bitvectors.
  474. *
  475. * @param a - first bitvector, MUST have d dimensions
  476. * @param b - second bitvector, MUST have d dimensions
  477. * @param d - pointer to size_t, MUST be divisible by CHAR_BIT
  478. * @return f32
  479. */
  480. static f32 distance_hamming(const void *a, const void *b, const void *d) {
  481. size_t dimensions = *((size_t *)d);
  482. if ((dimensions % 64) == 0) {
  483. return distance_hamming_u64((u64 *)a, (u64 *)b, dimensions / 8 / CHAR_BIT);
  484. }
  485. return distance_hamming_u8((u8 *)a, (u8 *)b, dimensions / CHAR_BIT);
  486. }
  487. // from SQLite source:
  488. // https://github.com/sqlite/sqlite/blob/a509a90958ddb234d1785ed7801880ccb18b497e/src/json.c#L153
  489. static const char vecJsonIsSpaceX[] = {
  490. 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
  491. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  492. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  493. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  494. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  495. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  496. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  497. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  498. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  499. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  500. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  501. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  502. };
  503. #define vecJsonIsspace(x) (vecJsonIsSpaceX[(unsigned char)x])
  504. typedef void (*vector_cleanup)(void *p);
  505. void vector_cleanup_noop(void *_) { UNUSED_PARAMETER(_); }
  506. #define JSON_SUBTYPE 74
  507. void vtab_set_error(sqlite3_vtab *pVTab, const char *zFormat, ...) {
  508. va_list args;
  509. sqlite3_free(pVTab->zErrMsg);
  510. va_start(args, zFormat);
  511. pVTab->zErrMsg = sqlite3_vmprintf(zFormat, args);
  512. va_end(args);
  513. }
  514. struct Array {
  515. size_t element_size;
  516. size_t length;
  517. size_t capacity;
  518. void *z;
  519. };
  520. /**
  521. * @brief Initial an array with the given element size and capacity.
  522. *
  523. * @param array
  524. * @param element_size
  525. * @param init_capacity
  526. * @return SQLITE_OK on success, error code on failure. Only error is
  527. * SQLITE_NOMEM
  528. */
  529. int array_init(struct Array *array, size_t element_size, size_t init_capacity) {
  530. int sz = element_size * init_capacity;
  531. void *z = sqlite3_malloc(sz);
  532. if (!z) {
  533. return SQLITE_NOMEM;
  534. }
  535. memset(z, 0, sz);
  536. array->element_size = element_size;
  537. array->length = 0;
  538. array->capacity = init_capacity;
  539. array->z = z;
  540. return SQLITE_OK;
  541. }
  542. int array_append(struct Array *array, const void *element) {
  543. if (array->length == array->capacity) {
  544. size_t new_capacity = array->capacity * 2 + 100;
  545. void *z = sqlite3_realloc64(array->z, array->element_size * new_capacity);
  546. if (z) {
  547. array->capacity = new_capacity;
  548. array->z = z;
  549. } else {
  550. return SQLITE_NOMEM;
  551. }
  552. }
  553. memcpy(&((unsigned char *)array->z)[array->length * array->element_size],
  554. element, array->element_size);
  555. array->length++;
  556. return SQLITE_OK;
  557. }
  558. void array_cleanup(struct Array *array) {
  559. if (!array)
  560. return;
  561. array->element_size = 0;
  562. array->length = 0;
  563. array->capacity = 0;
  564. sqlite3_free(array->z);
  565. array->z = NULL;
  566. }
  567. char *vector_subtype_name(int subtype) {
  568. switch (subtype) {
  569. case SQLITE_VEC_ELEMENT_TYPE_FLOAT32:
  570. return "float32";
  571. case SQLITE_VEC_ELEMENT_TYPE_INT8:
  572. return "int8";
  573. case SQLITE_VEC_ELEMENT_TYPE_BIT:
  574. return "bit";
  575. }
  576. return "";
  577. }
  578. char *type_name(int type) {
  579. switch (type) {
  580. case SQLITE_INTEGER:
  581. return "INTEGER";
  582. case SQLITE_BLOB:
  583. return "BLOB";
  584. case SQLITE_TEXT:
  585. return "TEXT";
  586. case SQLITE_FLOAT:
  587. return "FLOAT";
  588. case SQLITE_NULL:
  589. return "NULL";
  590. }
  591. return "";
  592. }
  593. typedef void (*fvec_cleanup)(f32 *vector);
  594. void fvec_cleanup_noop(f32 *_) { UNUSED_PARAMETER(_); }
  595. static int fvec_from_value(sqlite3_value *value, f32 **vector,
  596. size_t *dimensions, fvec_cleanup *cleanup,
  597. char **pzErr) {
  598. int value_type = sqlite3_value_type(value);
  599. if (value_type == SQLITE_BLOB) {
  600. const void *blob = sqlite3_value_blob(value);
  601. int bytes = sqlite3_value_bytes(value);
  602. if (bytes == 0) {
  603. *pzErr = sqlite3_mprintf("zero-length vectors are not supported.");
  604. return SQLITE_ERROR;
  605. }
  606. if ((bytes % sizeof(f32)) != 0) {
  607. *pzErr = sqlite3_mprintf("invalid float32 vector BLOB length. Must be "
  608. "divisible by %d, found %d",
  609. sizeof(f32), bytes);
  610. return SQLITE_ERROR;
  611. }
  612. *vector = (f32 *)blob;
  613. *dimensions = bytes / sizeof(f32);
  614. *cleanup = fvec_cleanup_noop;
  615. return SQLITE_OK;
  616. }
  617. if (value_type == SQLITE_TEXT) {
  618. const char *source = (const char *)sqlite3_value_text(value);
  619. int source_len = sqlite3_value_bytes(value);
  620. if (source_len == 0) {
  621. *pzErr = sqlite3_mprintf("zero-length vectors are not supported.");
  622. return SQLITE_ERROR;
  623. }
  624. int i = 0;
  625. struct Array x;
  626. int rc = array_init(&x, sizeof(f32), ceil(source_len / 2.0));
  627. if (rc != SQLITE_OK) {
  628. return rc;
  629. }
  630. // advance leading whitespace to first '['
  631. while (i < source_len) {
  632. if (vecJsonIsspace(source[i])) {
  633. i++;
  634. continue;
  635. }
  636. if (source[i] == '[') {
  637. break;
  638. }
  639. *pzErr = sqlite3_mprintf(
  640. "JSON array parsing error: Input does not start with '['");
  641. array_cleanup(&x);
  642. return SQLITE_ERROR;
  643. }
  644. if (source[i] != '[') {
  645. *pzErr = sqlite3_mprintf(
  646. "JSON array parsing error: Input does not start with '['");
  647. array_cleanup(&x);
  648. return SQLITE_ERROR;
  649. }
  650. int offset = i + 1;
  651. while (offset < source_len) {
  652. char *ptr = (char *)&source[offset];
  653. char *endptr;
  654. errno = 0;
  655. double result = strtod(ptr, &endptr);
  656. if ((errno != 0 && result == 0) // some interval error?
  657. || (errno == ERANGE &&
  658. (result == HUGE_VAL || result == -HUGE_VAL)) // too big / smalls
  659. ) {
  660. sqlite3_free(x.z);
  661. *pzErr = sqlite3_mprintf("JSON parsing error");
  662. return SQLITE_ERROR;
  663. }
  664. if (endptr == ptr) {
  665. if (*ptr != ']') {
  666. sqlite3_free(x.z);
  667. *pzErr = sqlite3_mprintf("JSON parsing error");
  668. return SQLITE_ERROR;
  669. }
  670. goto done;
  671. }
  672. f32 res = (f32)result;
  673. array_append(&x, (const void *)&res);
  674. offset += (endptr - ptr);
  675. while (offset < source_len) {
  676. if (vecJsonIsspace(source[offset])) {
  677. offset++;
  678. continue;
  679. }
  680. if (source[offset] == ',') {
  681. offset++;
  682. continue;
  683. }
  684. if (source[offset] == ']')
  685. goto done;
  686. break;
  687. }
  688. }
  689. done:
  690. if (x.length > 0) {
  691. *vector = (f32 *)x.z;
  692. *dimensions = x.length;
  693. *cleanup = (fvec_cleanup)sqlite3_free;
  694. return SQLITE_OK;
  695. }
  696. sqlite3_free(x.z);
  697. *pzErr = sqlite3_mprintf("zero-length vectors are not supported.");
  698. return SQLITE_ERROR;
  699. }
  700. *pzErr = sqlite3_mprintf(
  701. "Input must have type BLOB (compact format) or TEXT (JSON), found %s",
  702. type_name(value_type));
  703. return SQLITE_ERROR;
  704. }
  705. static int bitvec_from_value(sqlite3_value *value, u8 **vector,
  706. size_t *dimensions, vector_cleanup *cleanup,
  707. char **pzErr) {
  708. int value_type = sqlite3_value_type(value);
  709. if (value_type == SQLITE_BLOB) {
  710. const void *blob = sqlite3_value_blob(value);
  711. int bytes = sqlite3_value_bytes(value);
  712. if (bytes == 0) {
  713. *pzErr = sqlite3_mprintf("zero-length vectors are not supported.");
  714. return SQLITE_ERROR;
  715. }
  716. *vector = (u8 *)blob;
  717. *dimensions = bytes * CHAR_BIT;
  718. *cleanup = vector_cleanup_noop;
  719. return SQLITE_OK;
  720. }
  721. *pzErr = sqlite3_mprintf("Unknown type for bitvector.");
  722. return SQLITE_ERROR;
  723. }
  724. static int int8_vec_from_value(sqlite3_value *value, i8 **vector,
  725. size_t *dimensions, vector_cleanup *cleanup,
  726. char **pzErr) {
  727. int value_type = sqlite3_value_type(value);
  728. if (value_type == SQLITE_BLOB) {
  729. const void *blob = sqlite3_value_blob(value);
  730. int bytes = sqlite3_value_bytes(value);
  731. if (bytes == 0) {
  732. *pzErr = sqlite3_mprintf("zero-length vectors are not supported.");
  733. return SQLITE_ERROR;
  734. }
  735. *vector = (i8 *)blob;
  736. *dimensions = bytes;
  737. *cleanup = vector_cleanup_noop;
  738. return SQLITE_OK;
  739. }
  740. if (value_type == SQLITE_TEXT) {
  741. const char *source = (const char *)sqlite3_value_text(value);
  742. int source_len = sqlite3_value_bytes(value);
  743. int i = 0;
  744. if (source_len == 0) {
  745. *pzErr = sqlite3_mprintf("zero-length vectors are not supported.");
  746. return SQLITE_ERROR;
  747. }
  748. struct Array x;
  749. int rc = array_init(&x, sizeof(i8), ceil(source_len / 2.0));
  750. if (rc != SQLITE_OK) {
  751. return rc;
  752. }
  753. // advance leading whitespace to first '['
  754. while (i < source_len) {
  755. if (vecJsonIsspace(source[i])) {
  756. i++;
  757. continue;
  758. }
  759. if (source[i] == '[') {
  760. break;
  761. }
  762. *pzErr = sqlite3_mprintf(
  763. "JSON array parsing error: Input does not start with '['");
  764. array_cleanup(&x);
  765. return SQLITE_ERROR;
  766. }
  767. if (source[i] != '[') {
  768. *pzErr = sqlite3_mprintf(
  769. "JSON array parsing error: Input does not start with '['");
  770. array_cleanup(&x);
  771. return SQLITE_ERROR;
  772. }
  773. int offset = i + 1;
  774. while (offset < source_len) {
  775. char *ptr = (char *)&source[offset];
  776. char *endptr;
  777. errno = 0;
  778. long result = strtol(ptr, &endptr, 10);
  779. if ((errno != 0 && result == 0) ||
  780. (errno == ERANGE && (result == LONG_MAX || result == LONG_MIN))) {
  781. sqlite3_free(x.z);
  782. *pzErr = sqlite3_mprintf("JSON parsing error");
  783. return SQLITE_ERROR;
  784. }
  785. if (endptr == ptr) {
  786. if (*ptr != ']') {
  787. sqlite3_free(x.z);
  788. *pzErr = sqlite3_mprintf("JSON parsing error");
  789. return SQLITE_ERROR;
  790. }
  791. goto done;
  792. }
  793. if (result < INT8_MIN || result > INT8_MAX) {
  794. sqlite3_free(x.z);
  795. *pzErr =
  796. sqlite3_mprintf("JSON parsing error: value out of range for int8");
  797. return SQLITE_ERROR;
  798. }
  799. i8 res = (i8)result;
  800. array_append(&x, (const void *)&res);
  801. offset += (endptr - ptr);
  802. while (offset < source_len) {
  803. if (vecJsonIsspace(source[offset])) {
  804. offset++;
  805. continue;
  806. }
  807. if (source[offset] == ',') {
  808. offset++;
  809. continue;
  810. }
  811. if (source[offset] == ']')
  812. goto done;
  813. break;
  814. }
  815. }
  816. done:
  817. if (x.length > 0) {
  818. *vector = (i8 *)x.z;
  819. *dimensions = x.length;
  820. *cleanup = (vector_cleanup)sqlite3_free;
  821. return SQLITE_OK;
  822. }
  823. sqlite3_free(x.z);
  824. *pzErr = sqlite3_mprintf("zero-length vectors are not supported.");
  825. return SQLITE_ERROR;
  826. }
  827. *pzErr = sqlite3_mprintf("Unknown type for int8 vector.");
  828. return SQLITE_ERROR;
  829. }
  830. /**
  831. * @brief Extract a vector from a sqlite3_value. Can be a float32, int8, or bit
  832. * vector.
  833. *
  834. * @param value: the sqlite3_value to read from.
  835. * @param vector: Output pointer to vector data.
  836. * @param dimensions: Output number of dimensions
  837. * @param dimensions: Output vector element type
  838. * @param cleanup
  839. * @param pzErrorMessage
  840. * @return int SQLITE_OK on success, error code otherwise
  841. */
  842. int vector_from_value(sqlite3_value *value, void **vector, size_t *dimensions,
  843. enum VectorElementType *element_type,
  844. vector_cleanup *cleanup, char **pzErrorMessage) {
  845. int subtype = sqlite3_value_subtype(value);
  846. if (!subtype || (subtype == SQLITE_VEC_ELEMENT_TYPE_FLOAT32) ||
  847. (subtype == JSON_SUBTYPE)) {
  848. int rc = fvec_from_value(value, (f32 **)vector, dimensions,
  849. (fvec_cleanup *)cleanup, pzErrorMessage);
  850. if (rc == SQLITE_OK) {
  851. *element_type = SQLITE_VEC_ELEMENT_TYPE_FLOAT32;
  852. }
  853. return rc;
  854. }
  855. if (subtype == SQLITE_VEC_ELEMENT_TYPE_BIT) {
  856. int rc = bitvec_from_value(value, (u8 **)vector, dimensions, cleanup,
  857. pzErrorMessage);
  858. if (rc == SQLITE_OK) {
  859. *element_type = SQLITE_VEC_ELEMENT_TYPE_BIT;
  860. }
  861. return rc;
  862. }
  863. if (subtype == SQLITE_VEC_ELEMENT_TYPE_INT8) {
  864. int rc = int8_vec_from_value(value, (i8 **)vector, dimensions, cleanup,
  865. pzErrorMessage);
  866. if (rc == SQLITE_OK) {
  867. *element_type = SQLITE_VEC_ELEMENT_TYPE_INT8;
  868. }
  869. return rc;
  870. }
  871. *pzErrorMessage = sqlite3_mprintf("Unknown subtype: %d", subtype);
  872. return SQLITE_ERROR;
  873. }
  874. int ensure_vector_match(sqlite3_value *aValue, sqlite3_value *bValue, void **a,
  875. void **b, enum VectorElementType *element_type,
  876. size_t *dimensions, vector_cleanup *outACleanup,
  877. vector_cleanup *outBCleanup, char **outError) {
  878. int rc;
  879. enum VectorElementType aType, bType;
  880. size_t aDims, bDims;
  881. char *error = NULL;
  882. vector_cleanup aCleanup, bCleanup;
  883. rc = vector_from_value(aValue, a, &aDims, &aType, &aCleanup, &error);
  884. if (rc != SQLITE_OK) {
  885. *outError = sqlite3_mprintf("Error reading 1st vector: %s", error);
  886. sqlite3_free(error);
  887. return SQLITE_ERROR;
  888. }
  889. rc = vector_from_value(bValue, b, &bDims, &bType, &bCleanup, &error);
  890. if (rc != SQLITE_OK) {
  891. *outError = sqlite3_mprintf("Error reading 2nd vector: %s", error);
  892. sqlite3_free(error);
  893. aCleanup(a);
  894. return SQLITE_ERROR;
  895. }
  896. if (aType != bType) {
  897. *outError =
  898. sqlite3_mprintf("Vector type mistmatch. First vector has type %s, "
  899. "while the second has type %s.",
  900. vector_subtype_name(aType), vector_subtype_name(bType));
  901. aCleanup(*a);
  902. bCleanup(*b);
  903. return SQLITE_ERROR;
  904. }
  905. if (aDims != bDims) {
  906. *outError = sqlite3_mprintf(
  907. "Vector dimension mistmatch. First vector has %ld dimensions, "
  908. "while the second has %ld dimensions.",
  909. aDims, bDims);
  910. aCleanup(*a);
  911. bCleanup(*b);
  912. return SQLITE_ERROR;
  913. }
  914. *element_type = aType;
  915. *dimensions = aDims;
  916. *outACleanup = aCleanup;
  917. *outBCleanup = bCleanup;
  918. return SQLITE_OK;
  919. }
  920. int _cmp(const void *a, const void *b) { return (*(i64 *)a - *(i64 *)b); }
  921. struct VecNpyFile {
  922. char *path;
  923. size_t pathLength;
  924. };
  925. #define SQLITE_VEC_NPY_FILE_NAME "vec0-npy-file"
  926. #ifndef SQLITE_VEC_OMIT_FS
  927. static void vec_npy_file(sqlite3_context *context, int argc,
  928. sqlite3_value **argv) {
  929. assert(argc == 1);
  930. char *path = (char *)sqlite3_value_text(argv[0]);
  931. size_t pathLength = sqlite3_value_bytes(argv[0]);
  932. struct VecNpyFile *f;
  933. f = sqlite3_malloc(sizeof(*f));
  934. if (!f) {
  935. sqlite3_result_error_nomem(context);
  936. return;
  937. }
  938. memset(f, 0, sizeof(*f));
  939. f->path = path;
  940. f->pathLength = pathLength;
  941. sqlite3_result_pointer(context, f, SQLITE_VEC_NPY_FILE_NAME, sqlite3_free);
  942. }
  943. #endif
  944. #pragma region scalar functions
  945. static void vec_f32(sqlite3_context *context, int argc, sqlite3_value **argv) {
  946. assert(argc == 1);
  947. int rc;
  948. f32 *vector = NULL;
  949. size_t dimensions;
  950. fvec_cleanup cleanup;
  951. char *errmsg;
  952. rc = fvec_from_value(argv[0], &vector, &dimensions, &cleanup, &errmsg);
  953. if (rc != SQLITE_OK) {
  954. sqlite3_result_error(context, errmsg, -1);
  955. sqlite3_free(errmsg);
  956. return;
  957. }
  958. sqlite3_result_blob(context, vector, dimensions * sizeof(f32),
  959. (void (*)(void *))cleanup);
  960. sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32);
  961. }
  962. static void vec_bit(sqlite3_context *context, int argc, sqlite3_value **argv) {
  963. assert(argc == 1);
  964. int rc;
  965. u8 *vector;
  966. size_t dimensions;
  967. vector_cleanup cleanup;
  968. char *errmsg;
  969. rc = bitvec_from_value(argv[0], &vector, &dimensions, &cleanup, &errmsg);
  970. if (rc != SQLITE_OK) {
  971. sqlite3_result_error(context, errmsg, -1);
  972. sqlite3_free(errmsg);
  973. return;
  974. }
  975. sqlite3_result_blob(context, vector, dimensions / CHAR_BIT, SQLITE_TRANSIENT);
  976. sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT);
  977. cleanup(vector);
  978. }
  979. static void vec_int8(sqlite3_context *context, int argc, sqlite3_value **argv) {
  980. assert(argc == 1);
  981. int rc;
  982. i8 *vector;
  983. size_t dimensions;
  984. vector_cleanup cleanup;
  985. char *errmsg;
  986. rc = int8_vec_from_value(argv[0], &vector, &dimensions, &cleanup, &errmsg);
  987. if (rc != SQLITE_OK) {
  988. sqlite3_result_error(context, errmsg, -1);
  989. sqlite3_free(errmsg);
  990. return;
  991. }
  992. sqlite3_result_blob(context, vector, dimensions, SQLITE_TRANSIENT);
  993. sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8);
  994. cleanup(vector);
  995. }
  996. static void vec_length(sqlite3_context *context, int argc,
  997. sqlite3_value **argv) {
  998. assert(argc == 1);
  999. int rc;
  1000. void *vector;
  1001. size_t dimensions;
  1002. vector_cleanup cleanup;
  1003. char *errmsg;
  1004. enum VectorElementType elementType;
  1005. rc = vector_from_value(argv[0], &vector, &dimensions, &elementType, &cleanup,
  1006. &errmsg);
  1007. if (rc != SQLITE_OK) {
  1008. sqlite3_result_error(context, errmsg, -1);
  1009. sqlite3_free(errmsg);
  1010. return;
  1011. }
  1012. sqlite3_result_int64(context, dimensions);
  1013. cleanup(vector);
  1014. }
  1015. static void vec_distance_cosine(sqlite3_context *context, int argc,
  1016. sqlite3_value **argv) {
  1017. assert(argc == 2);
  1018. int rc;
  1019. void *a = NULL, *b = NULL;
  1020. size_t dimensions;
  1021. vector_cleanup aCleanup, bCleanup;
  1022. char *error;
  1023. enum VectorElementType elementType;
  1024. rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
  1025. &aCleanup, &bCleanup, &error);
  1026. if (rc != SQLITE_OK) {
  1027. sqlite3_result_error(context, error, -1);
  1028. sqlite3_free(error);
  1029. return;
  1030. }
  1031. switch (elementType) {
  1032. case SQLITE_VEC_ELEMENT_TYPE_BIT: {
  1033. sqlite3_result_error(
  1034. context, "Cannot calculate cosine distance between two bitvectors.",
  1035. -1);
  1036. goto finish;
  1037. }
  1038. case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
  1039. f32 result = distance_cosine_float(a, b, &dimensions);
  1040. sqlite3_result_double(context, result);
  1041. goto finish;
  1042. }
  1043. case SQLITE_VEC_ELEMENT_TYPE_INT8: {
  1044. f32 result = distance_cosine_int8(a, b, &dimensions);
  1045. sqlite3_result_double(context, result);
  1046. goto finish;
  1047. }
  1048. }
  1049. finish:
  1050. aCleanup(a);
  1051. bCleanup(b);
  1052. return;
  1053. }
  1054. static void vec_distance_l2(sqlite3_context *context, int argc,
  1055. sqlite3_value **argv) {
  1056. assert(argc == 2);
  1057. int rc;
  1058. void *a = NULL, *b = NULL;
  1059. size_t dimensions;
  1060. vector_cleanup aCleanup, bCleanup;
  1061. char *error;
  1062. enum VectorElementType elementType;
  1063. rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
  1064. &aCleanup, &bCleanup, &error);
  1065. if (rc != SQLITE_OK) {
  1066. sqlite3_result_error(context, error, -1);
  1067. sqlite3_free(error);
  1068. return;
  1069. }
  1070. switch (elementType) {
  1071. case SQLITE_VEC_ELEMENT_TYPE_BIT: {
  1072. sqlite3_result_error(
  1073. context, "Cannot calculate L2 distance between two bitvectors.", -1);
  1074. goto finish;
  1075. }
  1076. case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
  1077. f32 result = distance_l2_sqr_float(a, b, &dimensions);
  1078. sqlite3_result_double(context, result);
  1079. goto finish;
  1080. }
  1081. case SQLITE_VEC_ELEMENT_TYPE_INT8: {
  1082. f32 result = distance_l2_sqr_int8(a, b, &dimensions);
  1083. sqlite3_result_double(context, result);
  1084. goto finish;
  1085. }
  1086. }
  1087. finish:
  1088. aCleanup(a);
  1089. bCleanup(b);
  1090. return;
  1091. }
  1092. static void vec_distance_l1(sqlite3_context *context, int argc,
  1093. sqlite3_value **argv) {
  1094. assert(argc == 2);
  1095. int rc;
  1096. void *a, *b;
  1097. size_t dimensions;
  1098. vector_cleanup aCleanup, bCleanup;
  1099. char *error;
  1100. enum VectorElementType elementType;
  1101. rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
  1102. &aCleanup, &bCleanup, &error);
  1103. if (rc != SQLITE_OK) {
  1104. sqlite3_result_error(context, error, -1);
  1105. sqlite3_free(error);
  1106. return;
  1107. }
  1108. switch (elementType) {
  1109. case SQLITE_VEC_ELEMENT_TYPE_BIT: {
  1110. sqlite3_result_error(
  1111. context, "Cannot calculate L1 distance between two bitvectors.", -1);
  1112. goto finish;
  1113. }
  1114. case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
  1115. double result = distance_l1_f32(a, b, &dimensions);
  1116. sqlite3_result_double(context, result);
  1117. goto finish;
  1118. }
  1119. case SQLITE_VEC_ELEMENT_TYPE_INT8: {
  1120. i64 result = distance_l1_int8(a, b, &dimensions);
  1121. sqlite3_result_int(context, result);
  1122. goto finish;
  1123. }
  1124. }
  1125. finish:
  1126. aCleanup(a);
  1127. bCleanup(b);
  1128. return;
  1129. }
  1130. static void vec_distance_hamming(sqlite3_context *context, int argc,
  1131. sqlite3_value **argv) {
  1132. assert(argc == 2);
  1133. int rc;
  1134. void *a = NULL, *b = NULL;
  1135. size_t dimensions;
  1136. vector_cleanup aCleanup, bCleanup;
  1137. char *error;
  1138. enum VectorElementType elementType;
  1139. rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
  1140. &aCleanup, &bCleanup, &error);
  1141. if (rc != SQLITE_OK) {
  1142. sqlite3_result_error(context, error, -1);
  1143. sqlite3_free(error);
  1144. return;
  1145. }
  1146. switch (elementType) {
  1147. case SQLITE_VEC_ELEMENT_TYPE_BIT: {
  1148. sqlite3_result_double(context, distance_hamming(a, b, &dimensions));
  1149. goto finish;
  1150. }
  1151. case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
  1152. sqlite3_result_error(
  1153. context,
  1154. "Cannot calculate hamming distance between two float32 vectors.", -1);
  1155. goto finish;
  1156. }
  1157. case SQLITE_VEC_ELEMENT_TYPE_INT8: {
  1158. sqlite3_result_error(
  1159. context, "Cannot calculate hamming distance between two int8 vectors.",
  1160. -1);
  1161. goto finish;
  1162. }
  1163. }
  1164. finish:
  1165. aCleanup(a);
  1166. bCleanup(b);
  1167. return;
  1168. }
  1169. char *vec_type_name(enum VectorElementType elementType) {
  1170. switch (elementType) {
  1171. case SQLITE_VEC_ELEMENT_TYPE_FLOAT32:
  1172. return "float32";
  1173. case SQLITE_VEC_ELEMENT_TYPE_INT8:
  1174. return "int8";
  1175. case SQLITE_VEC_ELEMENT_TYPE_BIT:
  1176. return "bit";
  1177. }
  1178. return "";
  1179. }
  1180. static void vec_type(sqlite3_context *context, int argc, sqlite3_value **argv) {
  1181. assert(argc == 1);
  1182. void *vector;
  1183. size_t dimensions;
  1184. vector_cleanup cleanup;
  1185. char *pzError;
  1186. enum VectorElementType elementType;
  1187. int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
  1188. &cleanup, &pzError);
  1189. if (rc != SQLITE_OK) {
  1190. sqlite3_result_error(context, pzError, -1);
  1191. sqlite3_free(pzError);
  1192. return;
  1193. }
  1194. sqlite3_result_text(context, vec_type_name(elementType), -1, SQLITE_STATIC);
  1195. cleanup(vector);
  1196. }
  1197. static void vec_quantize_binary(sqlite3_context *context, int argc,
  1198. sqlite3_value **argv) {
  1199. assert(argc == 1);
  1200. void *vector;
  1201. size_t dimensions;
  1202. vector_cleanup vectorCleanup;
  1203. char *pzError;
  1204. enum VectorElementType elementType;
  1205. int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
  1206. &vectorCleanup, &pzError);
  1207. if (rc != SQLITE_OK) {
  1208. sqlite3_result_error(context, pzError, -1);
  1209. sqlite3_free(pzError);
  1210. return;
  1211. }
  1212. if (dimensions <= 0) {
  1213. sqlite3_result_error(context, "Zero length vectors are not supported.", -1);
  1214. goto cleanup;
  1215. return;
  1216. }
  1217. if ((dimensions % CHAR_BIT) != 0) {
  1218. sqlite3_result_error(
  1219. context,
  1220. "Binary quantization requires vectors with a length divisible by 8",
  1221. -1);
  1222. goto cleanup;
  1223. return;
  1224. }
  1225. int sz = dimensions / CHAR_BIT;
  1226. u8 *out = sqlite3_malloc(sz);
  1227. if (!out) {
  1228. sqlite3_result_error_code(context, SQLITE_NOMEM);
  1229. goto cleanup;
  1230. return;
  1231. }
  1232. memset(out, 0, sz);
  1233. switch (elementType) {
  1234. case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
  1235. for (size_t i = 0; i < dimensions; i++) {
  1236. int res = ((f32 *)vector)[i] > 0.0;
  1237. out[i / 8] |= (res << (i % 8));
  1238. }
  1239. break;
  1240. }
  1241. case SQLITE_VEC_ELEMENT_TYPE_INT8: {
  1242. for (size_t i = 0; i < dimensions; i++) {
  1243. int res = ((i8 *)vector)[i] > 0;
  1244. out[i / 8] |= (res << (i % 8));
  1245. }
  1246. break;
  1247. }
  1248. case SQLITE_VEC_ELEMENT_TYPE_BIT: {
  1249. sqlite3_result_error(context,
  1250. "Can only binary quantize float or int8 vectors", -1);
  1251. sqlite3_free(out);
  1252. return;
  1253. }
  1254. }
  1255. sqlite3_result_blob(context, out, sz, sqlite3_free);
  1256. sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT);
  1257. cleanup:
  1258. vectorCleanup(vector);
  1259. }
  1260. static void vec_quantize_int8(sqlite3_context *context, int argc,
  1261. sqlite3_value **argv) {
  1262. assert(argc == 2);
  1263. f32 *srcVector;
  1264. size_t dimensions;
  1265. fvec_cleanup srcCleanup;
  1266. char *err;
  1267. i8 *out = NULL;
  1268. int rc = fvec_from_value(argv[0], &srcVector, &dimensions, &srcCleanup, &err);
  1269. if (rc != SQLITE_OK) {
  1270. sqlite3_result_error(context, err, -1);
  1271. sqlite3_free(err);
  1272. return;
  1273. }
  1274. int sz = dimensions * sizeof(i8);
  1275. out = sqlite3_malloc(sz);
  1276. if (!out) {
  1277. sqlite3_result_error_nomem(context);
  1278. goto cleanup;
  1279. }
  1280. memset(out, 0, sz);
  1281. if ((sqlite3_value_type(argv[1]) != SQLITE_TEXT) ||
  1282. (sqlite3_value_bytes(argv[1]) != strlen("unit")) ||
  1283. (sqlite3_stricmp((const char *)sqlite3_value_text(argv[1]), "unit") !=
  1284. 0)) {
  1285. sqlite3_result_error(
  1286. context, "2nd argument to vec_quantize_int8() must be 'unit'.", -1);
  1287. sqlite3_free(out);
  1288. goto cleanup;
  1289. }
  1290. f32 step = (1.0 - (-1.0)) / 255;
  1291. for (size_t i = 0; i < dimensions; i++) {
  1292. out[i] = ((srcVector[i] - (-1.0)) / step) - 128;
  1293. }
  1294. sqlite3_result_blob(context, out, dimensions * sizeof(i8), sqlite3_free);
  1295. sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8);
  1296. cleanup:
  1297. srcCleanup(srcVector);
  1298. }
  1299. static void vec_add(sqlite3_context *context, int argc, sqlite3_value **argv) {
  1300. assert(argc == 2);
  1301. int rc;
  1302. void *a = NULL, *b = NULL;
  1303. size_t dimensions;
  1304. vector_cleanup aCleanup, bCleanup;
  1305. char *error;
  1306. enum VectorElementType elementType;
  1307. rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
  1308. &aCleanup, &bCleanup, &error);
  1309. if (rc != SQLITE_OK) {
  1310. sqlite3_result_error(context, error, -1);
  1311. sqlite3_free(error);
  1312. return;
  1313. }
  1314. switch (elementType) {
  1315. case SQLITE_VEC_ELEMENT_TYPE_BIT: {
  1316. sqlite3_result_error(context, "Cannot add two bitvectors together.", -1);
  1317. goto finish;
  1318. }
  1319. case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
  1320. size_t outSize = dimensions * sizeof(f32);
  1321. f32 *out = sqlite3_malloc(outSize);
  1322. if (!out) {
  1323. sqlite3_result_error_nomem(context);
  1324. goto finish;
  1325. }
  1326. memset(out, 0, outSize);
  1327. for (size_t i = 0; i < dimensions; i++) {
  1328. out[i] = ((f32 *)a)[i] + ((f32 *)b)[i];
  1329. }
  1330. sqlite3_result_blob(context, out, outSize, sqlite3_free);
  1331. sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32);
  1332. goto finish;
  1333. }
  1334. case SQLITE_VEC_ELEMENT_TYPE_INT8: {
  1335. size_t outSize = dimensions * sizeof(i8);
  1336. i8 *out = sqlite3_malloc(outSize);
  1337. if (!out) {
  1338. sqlite3_result_error_nomem(context);
  1339. goto finish;
  1340. }
  1341. memset(out, 0, outSize);
  1342. for (size_t i = 0; i < dimensions; i++) {
  1343. out[i] = ((i8 *)a)[i] + ((i8 *)b)[i];
  1344. }
  1345. sqlite3_result_blob(context, out, outSize, sqlite3_free);
  1346. sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8);
  1347. goto finish;
  1348. }
  1349. }
  1350. finish:
  1351. aCleanup(a);
  1352. bCleanup(b);
  1353. return;
  1354. }
  1355. static void vec_sub(sqlite3_context *context, int argc, sqlite3_value **argv) {
  1356. assert(argc == 2);
  1357. int rc;
  1358. void *a = NULL, *b = NULL;
  1359. size_t dimensions;
  1360. vector_cleanup aCleanup, bCleanup;
  1361. char *error;
  1362. enum VectorElementType elementType;
  1363. rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
  1364. &aCleanup, &bCleanup, &error);
  1365. if (rc != SQLITE_OK) {
  1366. sqlite3_result_error(context, error, -1);
  1367. sqlite3_free(error);
  1368. return;
  1369. }
  1370. switch (elementType) {
  1371. case SQLITE_VEC_ELEMENT_TYPE_BIT: {
  1372. sqlite3_result_error(context, "Cannot subtract two bitvectors together.",
  1373. -1);
  1374. goto finish;
  1375. }
  1376. case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
  1377. size_t outSize = dimensions * sizeof(f32);
  1378. f32 *out = sqlite3_malloc(outSize);
  1379. if (!out) {
  1380. sqlite3_result_error_nomem(context);
  1381. goto finish;
  1382. }
  1383. memset(out, 0, outSize);
  1384. for (size_t i = 0; i < dimensions; i++) {
  1385. out[i] = ((f32 *)a)[i] - ((f32 *)b)[i];
  1386. }
  1387. sqlite3_result_blob(context, out, outSize, sqlite3_free);
  1388. sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32);
  1389. goto finish;
  1390. }
  1391. case SQLITE_VEC_ELEMENT_TYPE_INT8: {
  1392. size_t outSize = dimensions * sizeof(i8);
  1393. i8 *out = sqlite3_malloc(outSize);
  1394. if (!out) {
  1395. sqlite3_result_error_nomem(context);
  1396. goto finish;
  1397. }
  1398. memset(out, 0, outSize);
  1399. for (size_t i = 0; i < dimensions; i++) {
  1400. out[i] = ((i8 *)a)[i] - ((i8 *)b)[i];
  1401. }
  1402. sqlite3_result_blob(context, out, outSize, sqlite3_free);
  1403. sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8);
  1404. goto finish;
  1405. }
  1406. }
  1407. finish:
  1408. aCleanup(a);
  1409. bCleanup(b);
  1410. return;
  1411. }
  1412. static void vec_slice(sqlite3_context *context, int argc,
  1413. sqlite3_value **argv) {
  1414. assert(argc == 3);
  1415. void *vector;
  1416. size_t dimensions;
  1417. vector_cleanup cleanup;
  1418. char *err;
  1419. enum VectorElementType elementType;
  1420. int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
  1421. &cleanup, &err);
  1422. if (rc != SQLITE_OK) {
  1423. sqlite3_result_error(context, err, -1);
  1424. sqlite3_free(err);
  1425. return;
  1426. }
  1427. int start = sqlite3_value_int(argv[1]);
  1428. int end = sqlite3_value_int(argv[2]);
  1429. if (start < 0) {
  1430. sqlite3_result_error(context,
  1431. "slice 'start' index must be a postive number.", -1);
  1432. goto done;
  1433. }
  1434. if (end < 0) {
  1435. sqlite3_result_error(context, "slice 'end' index must be a postive number.",
  1436. -1);
  1437. goto done;
  1438. }
  1439. if (((size_t)start) > dimensions) {
  1440. sqlite3_result_error(
  1441. context, "slice 'start' index is greater than the number of dimensions",
  1442. -1);
  1443. goto done;
  1444. }
  1445. if (((size_t)end) > dimensions) {
  1446. sqlite3_result_error(
  1447. context, "slice 'end' index is greater than the number of dimensions",
  1448. -1);
  1449. goto done;
  1450. }
  1451. if (start > end) {
  1452. sqlite3_result_error(context,
  1453. "slice 'start' index is greater than 'end' index", -1);
  1454. goto done;
  1455. }
  1456. if (start == end) {
  1457. sqlite3_result_error(context,
  1458. "slice 'start' index is equal to the 'end' index, "
  1459. "vectors must have non-zero length",
  1460. -1);
  1461. goto done;
  1462. }
  1463. size_t n = end - start;
  1464. switch (elementType) {
  1465. case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
  1466. int outSize = n * sizeof(f32);
  1467. f32 *out = sqlite3_malloc(outSize);
  1468. if (!out) {
  1469. sqlite3_result_error_nomem(context);
  1470. goto done;
  1471. }
  1472. memset(out, 0, outSize);
  1473. for (size_t i = 0; i < n; i++) {
  1474. out[i] = ((f32 *)vector)[start + i];
  1475. }
  1476. sqlite3_result_blob(context, out, outSize, sqlite3_free);
  1477. sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32);
  1478. goto done;
  1479. }
  1480. case SQLITE_VEC_ELEMENT_TYPE_INT8: {
  1481. int outSize = n * sizeof(i8);
  1482. i8 *out = sqlite3_malloc(outSize);
  1483. if (!out) {
  1484. sqlite3_result_error_nomem(context);
  1485. return;
  1486. }
  1487. memset(out, 0, outSize);
  1488. for (size_t i = 0; i < n; i++) {
  1489. out[i] = ((i8 *)vector)[start + i];
  1490. }
  1491. sqlite3_result_blob(context, out, outSize, sqlite3_free);
  1492. sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8);
  1493. goto done;
  1494. }
  1495. case SQLITE_VEC_ELEMENT_TYPE_BIT: {
  1496. if ((start % CHAR_BIT) != 0) {
  1497. sqlite3_result_error(context, "start index must be divisible by 8.", -1);
  1498. goto done;
  1499. }
  1500. if ((end % CHAR_BIT) != 0) {
  1501. sqlite3_result_error(context, "end index must be divisible by 8.", -1);
  1502. goto done;
  1503. }
  1504. int outSize = n / CHAR_BIT;
  1505. u8 *out = sqlite3_malloc(outSize);
  1506. if (!out) {
  1507. sqlite3_result_error_nomem(context);
  1508. return;
  1509. }
  1510. memset(out, 0, outSize);
  1511. for (size_t i = 0; i < n / CHAR_BIT; i++) {
  1512. out[i] = ((u8 *)vector)[(start / CHAR_BIT) + i];
  1513. }
  1514. sqlite3_result_blob(context, out, outSize, sqlite3_free);
  1515. sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT);
  1516. goto done;
  1517. }
  1518. }
  1519. done:
  1520. cleanup(vector);
  1521. }
  1522. static void vec_to_json(sqlite3_context *context, int argc,
  1523. sqlite3_value **argv) {
  1524. assert(argc == 1);
  1525. void *vector;
  1526. size_t dimensions;
  1527. vector_cleanup cleanup;
  1528. char *err;
  1529. enum VectorElementType elementType;
  1530. int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
  1531. &cleanup, &err);
  1532. if (rc != SQLITE_OK) {
  1533. sqlite3_result_error(context, err, -1);
  1534. sqlite3_free(err);
  1535. return;
  1536. }
  1537. sqlite3_str *str = sqlite3_str_new(sqlite3_context_db_handle(context));
  1538. sqlite3_str_appendall(str, "[");
  1539. for (size_t i = 0; i < dimensions; i++) {
  1540. if (i != 0) {
  1541. sqlite3_str_appendall(str, ",");
  1542. }
  1543. if (elementType == SQLITE_VEC_ELEMENT_TYPE_FLOAT32) {
  1544. f32 value = ((f32 *)vector)[i];
  1545. if (isnan(value)) {
  1546. sqlite3_str_appendall(str, "null");
  1547. } else {
  1548. sqlite3_str_appendf(str, "%f", value);
  1549. }
  1550. } else if (elementType == SQLITE_VEC_ELEMENT_TYPE_INT8) {
  1551. sqlite3_str_appendf(str, "%d", ((i8 *)vector)[i]);
  1552. } else if (elementType == SQLITE_VEC_ELEMENT_TYPE_BIT) {
  1553. u8 b = (((u8 *)vector)[i / 8] >> (i % CHAR_BIT)) & 1;
  1554. sqlite3_str_appendf(str, "%d", b);
  1555. }
  1556. }
  1557. sqlite3_str_appendall(str, "]");
  1558. int len = sqlite3_str_length(str);
  1559. char *s = sqlite3_str_finish(str);
  1560. if (s) {
  1561. sqlite3_result_text(context, s, len, sqlite3_free);
  1562. sqlite3_result_subtype(context, JSON_SUBTYPE);
  1563. } else {
  1564. sqlite3_result_error_nomem(context);
  1565. }
  1566. cleanup(vector);
  1567. }
  1568. static void vec_normalize(sqlite3_context *context, int argc,
  1569. sqlite3_value **argv) {
  1570. assert(argc == 1);
  1571. void *vector;
  1572. size_t dimensions;
  1573. vector_cleanup cleanup;
  1574. char *err;
  1575. enum VectorElementType elementType;
  1576. int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
  1577. &cleanup, &err);
  1578. if (rc != SQLITE_OK) {
  1579. sqlite3_result_error(context, err, -1);
  1580. sqlite3_free(err);
  1581. return;
  1582. }
  1583. if (elementType != SQLITE_VEC_ELEMENT_TYPE_FLOAT32) {
  1584. sqlite3_result_error(
  1585. context, "only float32 vectors are supported when normalizing", -1);
  1586. cleanup(vector);
  1587. return;
  1588. }
  1589. int outSize = dimensions * sizeof(f32);
  1590. f32 *out = sqlite3_malloc(outSize);
  1591. if (!out) {
  1592. cleanup(vector);
  1593. sqlite3_result_error_code(context, SQLITE_NOMEM);
  1594. return;
  1595. }
  1596. memset(out, 0, outSize);
  1597. f32 *v = (f32 *)vector;
  1598. f32 norm = 0;
  1599. for (size_t i = 0; i < dimensions; i++) {
  1600. norm += v[i] * v[i];
  1601. }
  1602. norm = sqrt(norm);
  1603. for (size_t i = 0; i < dimensions; i++) {
  1604. out[i] = v[i] / norm;
  1605. }
  1606. sqlite3_result_blob(context, out, dimensions * sizeof(f32), sqlite3_free);
  1607. sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32);
  1608. cleanup(vector);
  1609. }
  1610. static void _static_text_func(sqlite3_context *context, int argc,
  1611. sqlite3_value **argv) {
  1612. UNUSED_PARAMETER(argc);
  1613. UNUSED_PARAMETER(argv);
  1614. sqlite3_result_text(context, sqlite3_user_data(context), -1, SQLITE_STATIC);
  1615. }
  1616. #pragma endregion
  1617. enum Vec0TokenType {
  1618. TOKEN_TYPE_IDENTIFIER,
  1619. TOKEN_TYPE_DIGIT,
  1620. TOKEN_TYPE_LBRACKET,
  1621. TOKEN_TYPE_RBRACKET,
  1622. TOKEN_TYPE_PLUS,
  1623. TOKEN_TYPE_EQ,
  1624. };
  1625. struct Vec0Token {
  1626. enum Vec0TokenType token_type;
  1627. char *start;
  1628. char *end;
  1629. };
  1630. int is_alpha(char x) {
  1631. return (x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z');
  1632. }
  1633. int is_digit(char x) { return (x >= '0' && x <= '9'); }
  1634. int is_whitespace(char x) {
  1635. return x == ' ' || x == '\t' || x == '\n' || x == '\r';
  1636. }
  1637. #define VEC0_TOKEN_RESULT_EOF 1
  1638. #define VEC0_TOKEN_RESULT_SOME 2
  1639. #define VEC0_TOKEN_RESULT_ERROR 3
  1640. int vec0_token_next(char *start, char *end, struct Vec0Token *out) {
  1641. char *ptr = start;
  1642. while (ptr < end) {
  1643. char curr = *ptr;
  1644. if (is_whitespace(curr)) {
  1645. ptr++;
  1646. continue;
  1647. } else if (curr == '+') {
  1648. ptr++;
  1649. out->start = ptr;
  1650. out->end = ptr;
  1651. out->token_type = TOKEN_TYPE_PLUS;
  1652. return VEC0_TOKEN_RESULT_SOME;
  1653. } else if (curr == '[') {
  1654. ptr++;
  1655. out->start = ptr;
  1656. out->end = ptr;
  1657. out->token_type = TOKEN_TYPE_LBRACKET;
  1658. return VEC0_TOKEN_RESULT_SOME;
  1659. } else if (curr == ']') {
  1660. ptr++;
  1661. out->start = ptr;
  1662. out->end = ptr;
  1663. out->token_type = TOKEN_TYPE_RBRACKET;
  1664. return VEC0_TOKEN_RESULT_SOME;
  1665. } else if (curr == '=') {
  1666. ptr++;
  1667. out->start = ptr;
  1668. out->end = ptr;
  1669. out->token_type = TOKEN_TYPE_EQ;
  1670. return VEC0_TOKEN_RESULT_SOME;
  1671. } else if (is_alpha(curr)) {
  1672. char *start = ptr;
  1673. while (ptr < end && (is_alpha(*ptr) || is_digit(*ptr) || *ptr == '_')) {
  1674. ptr++;
  1675. }
  1676. out->start = start;
  1677. out->end = ptr;
  1678. out->token_type = TOKEN_TYPE_IDENTIFIER;
  1679. return VEC0_TOKEN_RESULT_SOME;
  1680. } else if (is_digit(curr)) {
  1681. char *start = ptr;
  1682. while (ptr < end && (is_digit(*ptr))) {
  1683. ptr++;
  1684. }
  1685. out->start = start;
  1686. out->end = ptr;
  1687. out->token_type = TOKEN_TYPE_DIGIT;
  1688. return VEC0_TOKEN_RESULT_SOME;
  1689. } else {
  1690. return VEC0_TOKEN_RESULT_ERROR;
  1691. }
  1692. }
  1693. return VEC0_TOKEN_RESULT_EOF;
  1694. }
  1695. struct Vec0Scanner {
  1696. char *start;
  1697. char *end;
  1698. char *ptr;
  1699. };
  1700. void vec0_scanner_init(struct Vec0Scanner *scanner, const char *source,
  1701. int source_length) {
  1702. scanner->start = (char *)source;
  1703. scanner->end = (char *)source + source_length;
  1704. scanner->ptr = (char *)source;
  1705. }
  1706. int vec0_scanner_next(struct Vec0Scanner *scanner, struct Vec0Token *out) {
  1707. int rc = vec0_token_next(scanner->start, scanner->end, out);
  1708. if (rc == VEC0_TOKEN_RESULT_SOME) {
  1709. scanner->start = out->end;
  1710. }
  1711. return rc;
  1712. }
  1713. int vec0_parse_table_option(const char *source, int source_length,
  1714. char **out_key, int *out_key_length,
  1715. char **out_value, int *out_value_length) {
  1716. int rc;
  1717. struct Vec0Scanner scanner;
  1718. struct Vec0Token token;
  1719. char *key;
  1720. char *value;
  1721. int keyLength, valueLength;
  1722. vec0_scanner_init(&scanner, source, source_length);
  1723. rc = vec0_scanner_next(&scanner, &token);
  1724. if (rc != VEC0_TOKEN_RESULT_SOME &&
  1725. token.token_type != TOKEN_TYPE_IDENTIFIER) {
  1726. return SQLITE_EMPTY;
  1727. }
  1728. key = token.start;
  1729. keyLength = token.end - token.start;
  1730. rc = vec0_scanner_next(&scanner, &token);
  1731. if (rc != VEC0_TOKEN_RESULT_SOME && token.token_type != TOKEN_TYPE_EQ) {
  1732. return SQLITE_EMPTY;
  1733. }
  1734. rc = vec0_scanner_next(&scanner, &token);
  1735. if (rc != VEC0_TOKEN_RESULT_SOME &&
  1736. !((token.token_type == TOKEN_TYPE_IDENTIFIER) ||
  1737. (token.token_type == TOKEN_TYPE_DIGIT))) {
  1738. return SQLITE_ERROR;
  1739. }
  1740. value = token.start;
  1741. valueLength = token.end - token.start;
  1742. rc = vec0_scanner_next(&scanner, &token);
  1743. if (rc == VEC0_TOKEN_RESULT_EOF) {
  1744. *out_key = key;
  1745. *out_key_length = keyLength;
  1746. *out_value = value;
  1747. *out_value_length = valueLength;
  1748. return SQLITE_OK;
  1749. }
  1750. return SQLITE_ERROR;
  1751. }
  1752. /**
  1753. * @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if
  1754. * it's a PARTITION KEY definition.
  1755. *
  1756. * @param source: argv[i] source string
  1757. * @param source_length: length of the source string
  1758. * @param out_column_name: If it is a partition key, the output column name. Same lifetime
  1759. * as source, points to specific char *
  1760. * @param out_column_name_length: Length of out_column_name in bytes
  1761. * @param out_column_type: SQLITE_TEXT or SQLITE_INTEGER.
  1762. * @return int: SQLITE_EMPTY if not a PK, SQLITE_OK if it is.
  1763. */
  1764. int vec0_parse_partition_key_definition(const char *source, int source_length,
  1765. char **out_column_name,
  1766. int *out_column_name_length,
  1767. int *out_column_type) {
  1768. struct Vec0Scanner scanner;
  1769. struct Vec0Token token;
  1770. char *column_name;
  1771. int column_name_length;
  1772. int column_type;
  1773. vec0_scanner_init(&scanner, source, source_length);
  1774. // Check first token is identifier, will be the column name
  1775. int rc = vec0_scanner_next(&scanner, &token);
  1776. if (rc != VEC0_TOKEN_RESULT_SOME &&
  1777. token.token_type != TOKEN_TYPE_IDENTIFIER) {
  1778. return SQLITE_EMPTY;
  1779. }
  1780. column_name = token.start;
  1781. column_name_length = token.end - token.start;
  1782. // Check the next token matches "text" or "integer", as column type
  1783. rc = vec0_scanner_next(&scanner, &token);
  1784. if (rc != VEC0_TOKEN_RESULT_SOME &&
  1785. token.token_type != TOKEN_TYPE_IDENTIFIER) {
  1786. return SQLITE_EMPTY;
  1787. }
  1788. if (sqlite3_strnicmp(token.start, "text", token.end - token.start) == 0) {
  1789. column_type = SQLITE_TEXT;
  1790. } else if (sqlite3_strnicmp(token.start, "int", token.end - token.start) ==
  1791. 0 ||
  1792. sqlite3_strnicmp(token.start, "integer",
  1793. token.end - token.start) == 0) {
  1794. column_type = SQLITE_INTEGER;
  1795. } else {
  1796. return SQLITE_EMPTY;
  1797. }
  1798. // Check the next token is identifier and matches "partition"
  1799. rc = vec0_scanner_next(&scanner, &token);
  1800. if (rc != VEC0_TOKEN_RESULT_SOME &&
  1801. token.token_type != TOKEN_TYPE_IDENTIFIER) {
  1802. return SQLITE_EMPTY;
  1803. }
  1804. if (sqlite3_strnicmp(token.start, "partition", token.end - token.start) != 0) {
  1805. return SQLITE_EMPTY;
  1806. }
  1807. // Check the next token is identifier and matches "key"
  1808. rc = vec0_scanner_next(&scanner, &token);
  1809. if (rc != VEC0_TOKEN_RESULT_SOME &&
  1810. token.token_type != TOKEN_TYPE_IDENTIFIER) {
  1811. return SQLITE_EMPTY;
  1812. }
  1813. if (sqlite3_strnicmp(token.start, "key", token.end - token.start) != 0) {
  1814. return SQLITE_EMPTY;
  1815. }
  1816. *out_column_name = column_name;
  1817. *out_column_name_length = column_name_length;
  1818. *out_column_type = column_type;
  1819. return SQLITE_OK;
  1820. }
  1821. /**
  1822. * @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if
  1823. * it's an auxiliar column definition, ie `+[name] [type]` like `+contents text`
  1824. *
  1825. * @param source: argv[i] source string
  1826. * @param source_length: length of the source string
  1827. * @param out_column_name: If it is a partition key, the output column name. Same lifetime
  1828. * as source, points to specific char *
  1829. * @param out_column_name_length: Length of out_column_name in bytes
  1830. * @param out_column_type: SQLITE_TEXT, SQLITE_INTEGER, SQLITE_FLOAT, or SQLITE_BLOB.
  1831. * @return int: SQLITE_EMPTY if not an aux column, SQLITE_OK if it is.
  1832. */
  1833. int vec0_parse_auxiliary_column_definition(const char *source, int source_length,
  1834. char **out_column_name,
  1835. int *out_column_name_length,
  1836. int *out_column_type) {
  1837. struct Vec0Scanner scanner;
  1838. struct Vec0Token token;
  1839. char *column_name;
  1840. int column_name_length;
  1841. int column_type;
  1842. vec0_scanner_init(&scanner, source, source_length);
  1843. // Check first token is '+', which denotes aux columns
  1844. int rc = vec0_scanner_next(&scanner, &token);
  1845. if (rc != VEC0_TOKEN_RESULT_SOME ||
  1846. token.token_type != TOKEN_TYPE_PLUS) {
  1847. return SQLITE_EMPTY;
  1848. }
  1849. rc = vec0_scanner_next(&scanner, &token);
  1850. if (rc != VEC0_TOKEN_RESULT_SOME &&
  1851. token.token_type != TOKEN_TYPE_IDENTIFIER) {
  1852. return SQLITE_EMPTY;
  1853. }
  1854. column_name = token.start;
  1855. column_name_length = token.end - token.start;
  1856. // Check the next token matches "text" or "integer", as column type
  1857. rc = vec0_scanner_next(&scanner, &token);
  1858. if (rc != VEC0_TOKEN_RESULT_SOME &&
  1859. token.token_type != TOKEN_TYPE_IDENTIFIER) {
  1860. return SQLITE_EMPTY;
  1861. }
  1862. if (sqlite3_strnicmp(token.start, "text", token.end - token.start) == 0) {
  1863. column_type = SQLITE_TEXT;
  1864. } else if (sqlite3_strnicmp(token.start, "int", token.end - token.start) ==
  1865. 0 ||
  1866. sqlite3_strnicmp(token.start, "integer",
  1867. token.end - token.start) == 0) {
  1868. column_type = SQLITE_INTEGER;
  1869. } else if (sqlite3_strnicmp(token.start, "float", token.end - token.start) ==
  1870. 0 ||
  1871. sqlite3_strnicmp(token.start, "double",
  1872. token.end - token.start) == 0) {
  1873. column_type = SQLITE_FLOAT;
  1874. } else if (sqlite3_strnicmp(token.start, "blob", token.end - token.start) ==0) {
  1875. column_type = SQLITE_BLOB;
  1876. } else {
  1877. return SQLITE_EMPTY;
  1878. }
  1879. *out_column_name = column_name;
  1880. *out_column_name_length = column_name_length;
  1881. *out_column_type = column_type;
  1882. return SQLITE_OK;
  1883. }
  1884. typedef enum {
  1885. VEC0_METADATA_COLUMN_KIND_BOOLEAN,
  1886. VEC0_METADATA_COLUMN_KIND_INTEGER,
  1887. VEC0_METADATA_COLUMN_KIND_FLOAT,
  1888. VEC0_METADATA_COLUMN_KIND_TEXT,
  1889. // future: blob, date, datetime
  1890. } vec0_metadata_column_kind;
  1891. /**
  1892. * @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if
  1893. * it's an metadata column definition, ie `[name] [type]` like `is_released boolean`
  1894. *
  1895. * @param source: argv[i] source string
  1896. * @param source_length: length of the source string
  1897. * @param out_column_name: If it is a metadata column, the output column name. Same lifetime
  1898. * as source, points to specific char *
  1899. * @param out_column_name_length: Length of out_column_name in bytes
  1900. * @param out_column_type: one of vec0_metadata_column_kind
  1901. * @return int: SQLITE_EMPTY if not an metadata column, SQLITE_OK if it is.
  1902. */
  1903. int vec0_parse_metadata_column_definition(const char *source, int source_length,
  1904. char **out_column_name,
  1905. int *out_column_name_length,
  1906. vec0_metadata_column_kind *out_column_type) {
  1907. struct Vec0Scanner scanner;
  1908. struct Vec0Token token;
  1909. char *column_name;
  1910. int column_name_length;
  1911. vec0_metadata_column_kind column_type;
  1912. int rc;
  1913. vec0_scanner_init(&scanner, source, source_length);
  1914. rc = vec0_scanner_next(&scanner, &token);
  1915. if (rc != VEC0_TOKEN_RESULT_SOME ||
  1916. token.token_type != TOKEN_TYPE_IDENTIFIER) {
  1917. return SQLITE_EMPTY;
  1918. }
  1919. column_name = token.start;
  1920. column_name_length = token.end - token.start;
  1921. // Check the next token matches a valid metadata type
  1922. rc = vec0_scanner_next(&scanner, &token);
  1923. if (rc != VEC0_TOKEN_RESULT_SOME ||
  1924. token.token_type != TOKEN_TYPE_IDENTIFIER) {
  1925. return SQLITE_EMPTY;
  1926. }
  1927. char * t = token.start;
  1928. int n = token.end - token.start;
  1929. if (sqlite3_strnicmp(t, "boolean", n) == 0 || sqlite3_strnicmp(t, "bool", n) == 0) {
  1930. column_type = VEC0_METADATA_COLUMN_KIND_BOOLEAN;
  1931. }else if (sqlite3_strnicmp(t, "int64", n) == 0 || sqlite3_strnicmp(t, "integer64", n) == 0 || sqlite3_strnicmp(t, "integer", n) == 0 || sqlite3_strnicmp(t, "int", n) == 0) {
  1932. column_type = VEC0_METADATA_COLUMN_KIND_INTEGER;
  1933. }else if (sqlite3_strnicmp(t, "float", n) == 0 || sqlite3_strnicmp(t, "double", n) == 0 || sqlite3_strnicmp(t, "float64", n) == 0 || sqlite3_strnicmp(t, "f64", n) == 0) {
  1934. column_type = VEC0_METADATA_COLUMN_KIND_FLOAT;
  1935. } else if (sqlite3_strnicmp(t, "text", n) == 0) {
  1936. column_type = VEC0_METADATA_COLUMN_KIND_TEXT;
  1937. } else {
  1938. return SQLITE_EMPTY;
  1939. }
  1940. *out_column_name = column_name;
  1941. *out_column_name_length = column_name_length;
  1942. *out_column_type = column_type;
  1943. return SQLITE_OK;
  1944. }
  1945. /**
  1946. * @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if
  1947. * it's a PRIMARY KEY definition.
  1948. *
  1949. * @param source: argv[i] source string
  1950. * @param source_length: length of the source string
  1951. * @param out_column_name: If it is a PK, the output column name. Same lifetime
  1952. * as source, points to specific char *
  1953. * @param out_column_name_length: Length of out_column_name in bytes
  1954. * @param out_column_type: SQLITE_TEXT or SQLITE_INTEGER.
  1955. * @return int: SQLITE_EMPTY if not a PK, SQLITE_OK if it is.
  1956. */
  1957. int vec0_parse_primary_key_definition(const char *source, int source_length,
  1958. char **out_column_name,
  1959. int *out_column_name_length,
  1960. int *out_column_type) {
  1961. struct Vec0Scanner scanner;
  1962. struct Vec0Token token;
  1963. char *column_name;
  1964. int column_name_length;
  1965. int column_type;
  1966. vec0_scanner_init(&scanner, source, source_length);
  1967. // Check first token is identifier, will be the column name
  1968. int rc = vec0_scanner_next(&scanner, &token);
  1969. if (rc != VEC0_TOKEN_RESULT_SOME &&
  1970. token.token_type != TOKEN_TYPE_IDENTIFIER) {
  1971. return SQLITE_EMPTY;
  1972. }
  1973. column_name = token.start;
  1974. column_name_length = token.end - token.start;
  1975. // Check the next token matches "text" or "integer", as column type
  1976. rc = vec0_scanner_next(&scanner, &token);
  1977. if (rc != VEC0_TOKEN_RESULT_SOME &&
  1978. token.token_type != TOKEN_TYPE_IDENTIFIER) {
  1979. return SQLITE_EMPTY;
  1980. }
  1981. if (sqlite3_strnicmp(token.start, "text", token.end - token.start) == 0) {
  1982. column_type = SQLITE_TEXT;
  1983. } else if (sqlite3_strnicmp(token.start, "int", token.end - token.start) ==
  1984. 0 ||
  1985. sqlite3_strnicmp(token.start, "integer",
  1986. token.end - token.start) == 0) {
  1987. column_type = SQLITE_INTEGER;
  1988. } else {
  1989. return SQLITE_EMPTY;
  1990. }
  1991. // Check the next token is identifier and matches "primary"
  1992. rc = vec0_scanner_next(&scanner, &token);
  1993. if (rc != VEC0_TOKEN_RESULT_SOME &&
  1994. token.token_type != TOKEN_TYPE_IDENTIFIER) {
  1995. return SQLITE_EMPTY;
  1996. }
  1997. if (sqlite3_strnicmp(token.start, "primary", token.end - token.start) != 0) {
  1998. return SQLITE_EMPTY;
  1999. }
  2000. // Check the next token is identifier and matches "key"
  2001. rc = vec0_scanner_next(&scanner, &token);
  2002. if (rc != VEC0_TOKEN_RESULT_SOME &&
  2003. token.token_type != TOKEN_TYPE_IDENTIFIER) {
  2004. return SQLITE_EMPTY;
  2005. }
  2006. if (sqlite3_strnicmp(token.start, "key", token.end - token.start) != 0) {
  2007. return SQLITE_EMPTY;
  2008. }
  2009. *out_column_name = column_name;
  2010. *out_column_name_length = column_name_length;
  2011. *out_column_type = column_type;
  2012. return SQLITE_OK;
  2013. }
  2014. enum Vec0DistanceMetrics {
  2015. VEC0_DISTANCE_METRIC_L2 = 1,
  2016. VEC0_DISTANCE_METRIC_COSINE = 2,
  2017. VEC0_DISTANCE_METRIC_L1 = 3,
  2018. };
  2019. struct VectorColumnDefinition {
  2020. char *name;
  2021. int name_length;
  2022. size_t dimensions;
  2023. enum VectorElementType element_type;
  2024. enum Vec0DistanceMetrics distance_metric;
  2025. };
  2026. struct Vec0PartitionColumnDefinition {
  2027. int type;
  2028. char * name;
  2029. int name_length;
  2030. };
  2031. struct Vec0AuxiliaryColumnDefinition {
  2032. int type;
  2033. char * name;
  2034. int name_length;
  2035. };
  2036. struct Vec0MetadataColumnDefinition {
  2037. vec0_metadata_column_kind kind;
  2038. char * name;
  2039. int name_length;
  2040. };
  2041. size_t vector_byte_size(enum VectorElementType element_type,
  2042. size_t dimensions) {
  2043. switch (element_type) {
  2044. case SQLITE_VEC_ELEMENT_TYPE_FLOAT32:
  2045. return dimensions * sizeof(f32);
  2046. case SQLITE_VEC_ELEMENT_TYPE_INT8:
  2047. return dimensions * sizeof(i8);
  2048. case SQLITE_VEC_ELEMENT_TYPE_BIT:
  2049. return dimensions / CHAR_BIT;
  2050. }
  2051. return 0;
  2052. }
  2053. size_t vector_column_byte_size(struct VectorColumnDefinition column) {
  2054. return vector_byte_size(column.element_type, column.dimensions);
  2055. }
  2056. /**
  2057. * @brief Parse an vec0 vtab argv[i] column definition and see if
  2058. * it's a vector column defintion, ex `contents_embedding float[768]`.
  2059. *
  2060. * @param source vec0 argv[i] item
  2061. * @param source_length length of source in bytes
  2062. * @param outColumn Output the parse vector column to this struct, if success
  2063. * @return int SQLITE_OK on success, SQLITE_EMPTY is it's not a vector column
  2064. * definition, SQLITE_ERROR on error.
  2065. */
  2066. int vec0_parse_vector_column(const char *source, int source_length,
  2067. struct VectorColumnDefinition *outColumn) {
  2068. // parses a vector column definition like so:
  2069. // "abc float[123]", "abc_123 bit[1234]", eetc.
  2070. // https://github.com/asg017/sqlite-vec/issues/46
  2071. int rc;
  2072. struct Vec0Scanner scanner;
  2073. struct Vec0Token token;
  2074. char *name;
  2075. int nameLength;
  2076. enum VectorElementType elementType;
  2077. enum Vec0DistanceMetrics distanceMetric = VEC0_DISTANCE_METRIC_L2;
  2078. int dimensions;
  2079. vec0_scanner_init(&scanner, source, source_length);
  2080. // starts with an identifier
  2081. rc = vec0_scanner_next(&scanner, &token);
  2082. if (rc != VEC0_TOKEN_RESULT_SOME &&
  2083. token.token_type != TOKEN_TYPE_IDENTIFIER) {
  2084. return SQLITE_EMPTY;
  2085. }
  2086. name = token.start;
  2087. nameLength = token.end - token.start;
  2088. // vector column type comes next: float, int, or bit
  2089. rc = vec0_scanner_next(&scanner, &token);
  2090. if (rc != VEC0_TOKEN_RESULT_SOME ||
  2091. token.token_type != TOKEN_TYPE_IDENTIFIER) {
  2092. return SQLITE_EMPTY;
  2093. }
  2094. if (sqlite3_strnicmp(token.start, "float", 5) == 0 ||
  2095. sqlite3_strnicmp(token.start, "f32", 3) == 0) {
  2096. elementType = SQLITE_VEC_ELEMENT_TYPE_FLOAT32;
  2097. } else if (sqlite3_strnicmp(token.start, "int8", 4) == 0 ||
  2098. sqlite3_strnicmp(token.start, "i8", 2) == 0) {
  2099. elementType = SQLITE_VEC_ELEMENT_TYPE_INT8;
  2100. } else if (sqlite3_strnicmp(token.start, "bit", 3) == 0) {
  2101. elementType = SQLITE_VEC_ELEMENT_TYPE_BIT;
  2102. } else {
  2103. return SQLITE_EMPTY;
  2104. }
  2105. // left '[' bracket
  2106. rc = vec0_scanner_next(&scanner, &token);
  2107. if (rc != VEC0_TOKEN_RESULT_SOME && token.token_type != TOKEN_TYPE_LBRACKET) {
  2108. return SQLITE_EMPTY;
  2109. }
  2110. // digit, for vector dimension length
  2111. rc = vec0_scanner_next(&scanner, &token);
  2112. if (rc != VEC0_TOKEN_RESULT_SOME && token.token_type != TOKEN_TYPE_DIGIT) {
  2113. return SQLITE_ERROR;
  2114. }
  2115. dimensions = atoi(token.start);
  2116. if (dimensions <= 0) {
  2117. return SQLITE_ERROR;
  2118. }
  2119. // // right ']' bracket
  2120. rc = vec0_scanner_next(&scanner, &token);
  2121. if (rc != VEC0_TOKEN_RESULT_SOME && token.token_type != TOKEN_TYPE_RBRACKET) {
  2122. return SQLITE_ERROR;
  2123. }
  2124. // any other tokens left should be column-level options , ex `key=value`
  2125. // ex `distance_metric=L2 distance_metric=cosine` should error
  2126. while (1) {
  2127. // should be EOF or identifier (option key)
  2128. rc = vec0_scanner_next(&scanner, &token);
  2129. if (rc == VEC0_TOKEN_RESULT_EOF) {
  2130. break;
  2131. }
  2132. if (rc != VEC0_TOKEN_RESULT_SOME &&
  2133. token.token_type != TOKEN_TYPE_IDENTIFIER) {
  2134. return SQLITE_ERROR;
  2135. }
  2136. char *key = token.start;
  2137. int keyLength = token.end - token.start;
  2138. if (sqlite3_strnicmp(key, "distance_metric", keyLength) == 0) {
  2139. if (elementType == SQLITE_VEC_ELEMENT_TYPE_BIT) {
  2140. return SQLITE_ERROR;
  2141. }
  2142. // ensure equal sign after distance_metric
  2143. rc = vec0_scanner_next(&scanner, &token);
  2144. if (rc != VEC0_TOKEN_RESULT_SOME && token.token_type != TOKEN_TYPE_EQ) {
  2145. return SQLITE_ERROR;
  2146. }
  2147. // distance_metric value, an identifier (L2, cosine, etc)
  2148. rc = vec0_scanner_next(&scanner, &token);
  2149. if (rc != VEC0_TOKEN_RESULT_SOME &&
  2150. token.token_type != TOKEN_TYPE_IDENTIFIER) {
  2151. return SQLITE_ERROR;
  2152. }
  2153. char *value = token.start;
  2154. int valueLength = token.end - token.start;
  2155. if (sqlite3_strnicmp(value, "l2", valueLength) == 0) {
  2156. distanceMetric = VEC0_DISTANCE_METRIC_L2;
  2157. } else if (sqlite3_strnicmp(value, "l1", valueLength) == 0) {
  2158. distanceMetric = VEC0_DISTANCE_METRIC_L1;
  2159. } else if (sqlite3_strnicmp(value, "cosine", valueLength) == 0) {
  2160. distanceMetric = VEC0_DISTANCE_METRIC_COSINE;
  2161. } else {
  2162. return SQLITE_ERROR;
  2163. }
  2164. }
  2165. // unknown key
  2166. else {
  2167. return SQLITE_ERROR;
  2168. }
  2169. }
  2170. outColumn->name = sqlite3_mprintf("%.*s", nameLength, name);
  2171. if (!outColumn->name) {
  2172. return SQLITE_ERROR;
  2173. }
  2174. outColumn->name_length = nameLength;
  2175. outColumn->distance_metric = distanceMetric;
  2176. outColumn->element_type = elementType;
  2177. outColumn->dimensions = dimensions;
  2178. return SQLITE_OK;
  2179. }
  2180. #pragma region vec_each table function
  2181. typedef struct vec_each_vtab vec_each_vtab;
  2182. struct vec_each_vtab {
  2183. sqlite3_vtab base;
  2184. };
  2185. typedef struct vec_each_cursor vec_each_cursor;
  2186. struct vec_each_cursor {
  2187. sqlite3_vtab_cursor base;
  2188. i64 iRowid;
  2189. enum VectorElementType vector_type;
  2190. void *vector;
  2191. size_t dimensions;
  2192. vector_cleanup cleanup;
  2193. };
  2194. static int vec_eachConnect(sqlite3 *db, void *pAux, int argc,
  2195. const char *const *argv, sqlite3_vtab **ppVtab,
  2196. char **pzErr) {
  2197. UNUSED_PARAMETER(pAux);
  2198. UNUSED_PARAMETER(argc);
  2199. UNUSED_PARAMETER(argv);
  2200. UNUSED_PARAMETER(pzErr);
  2201. vec_each_vtab *pNew;
  2202. int rc;
  2203. rc = sqlite3_declare_vtab(db, "CREATE TABLE x(value, vector hidden)");
  2204. #define VEC_EACH_COLUMN_VALUE 0
  2205. #define VEC_EACH_COLUMN_VECTOR 1
  2206. if (rc == SQLITE_OK) {
  2207. pNew = sqlite3_malloc(sizeof(*pNew));
  2208. *ppVtab = (sqlite3_vtab *)pNew;
  2209. if (pNew == 0)
  2210. return SQLITE_NOMEM;
  2211. memset(pNew, 0, sizeof(*pNew));
  2212. }
  2213. return rc;
  2214. }
  2215. static int vec_eachDisconnect(sqlite3_vtab *pVtab) {
  2216. vec_each_vtab *p = (vec_each_vtab *)pVtab;
  2217. sqlite3_free(p);
  2218. return SQLITE_OK;
  2219. }
  2220. static int vec_eachOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) {
  2221. UNUSED_PARAMETER(p);
  2222. vec_each_cursor *pCur;
  2223. pCur = sqlite3_malloc(sizeof(*pCur));
  2224. if (pCur == 0)
  2225. return SQLITE_NOMEM;
  2226. memset(pCur, 0, sizeof(*pCur));
  2227. *ppCursor = &pCur->base;
  2228. return SQLITE_OK;
  2229. }
  2230. static int vec_eachClose(sqlite3_vtab_cursor *cur) {
  2231. vec_each_cursor *pCur = (vec_each_cursor *)cur;
  2232. if(pCur->vector) {
  2233. pCur->cleanup(pCur->vector);
  2234. }
  2235. sqlite3_free(pCur);
  2236. return SQLITE_OK;
  2237. }
  2238. static int vec_eachBestIndex(sqlite3_vtab *pVTab,
  2239. sqlite3_index_info *pIdxInfo) {
  2240. UNUSED_PARAMETER(pVTab);
  2241. int hasVector = 0;
  2242. for (int i = 0; i < pIdxInfo->nConstraint; i++) {
  2243. const struct sqlite3_index_constraint *pCons = &pIdxInfo->aConstraint[i];
  2244. // printf("i=%d iColumn=%d, op=%d, usable=%d\n", i, pCons->iColumn,
  2245. // pCons->op, pCons->usable);
  2246. switch (pCons->iColumn) {
  2247. case VEC_EACH_COLUMN_VECTOR: {
  2248. if (pCons->op == SQLITE_INDEX_CONSTRAINT_EQ && pCons->usable) {
  2249. hasVector = 1;
  2250. pIdxInfo->aConstraintUsage[i].argvIndex = 1;
  2251. pIdxInfo->aConstraintUsage[i].omit = 1;
  2252. }
  2253. break;
  2254. }
  2255. }
  2256. }
  2257. if (!hasVector) {
  2258. return SQLITE_CONSTRAINT;
  2259. }
  2260. pIdxInfo->estimatedCost = (double)100000;
  2261. pIdxInfo->estimatedRows = 100000;
  2262. return SQLITE_OK;
  2263. }
  2264. static int vec_eachFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum,
  2265. const char *idxStr, int argc, sqlite3_value **argv) {
  2266. UNUSED_PARAMETER(idxNum);
  2267. UNUSED_PARAMETER(idxStr);
  2268. assert(argc == 1);
  2269. vec_each_cursor *pCur = (vec_each_cursor *)pVtabCursor;
  2270. if (pCur->vector) {
  2271. pCur->cleanup(pCur->vector);
  2272. pCur->vector = NULL;
  2273. }
  2274. char *pzErrMsg;
  2275. int rc = vector_from_value(argv[0], &pCur->vector, &pCur->dimensions,
  2276. &pCur->vector_type, &pCur->cleanup, &pzErrMsg);
  2277. if (rc != SQLITE_OK) {
  2278. return SQLITE_ERROR;
  2279. }
  2280. pCur->iRowid = 0;
  2281. return SQLITE_OK;
  2282. }
  2283. static int vec_eachRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) {
  2284. vec_each_cursor *pCur = (vec_each_cursor *)cur;
  2285. *pRowid = pCur->iRowid;
  2286. return SQLITE_OK;
  2287. }
  2288. static int vec_eachEof(sqlite3_vtab_cursor *cur) {
  2289. vec_each_cursor *pCur = (vec_each_cursor *)cur;
  2290. return pCur->iRowid >= (i64)pCur->dimensions;
  2291. }
  2292. static int vec_eachNext(sqlite3_vtab_cursor *cur) {
  2293. vec_each_cursor *pCur = (vec_each_cursor *)cur;
  2294. pCur->iRowid++;
  2295. return SQLITE_OK;
  2296. }
  2297. static int vec_eachColumn(sqlite3_vtab_cursor *cur, sqlite3_context *context,
  2298. int i) {
  2299. vec_each_cursor *pCur = (vec_each_cursor *)cur;
  2300. switch (i) {
  2301. case VEC_EACH_COLUMN_VALUE:
  2302. switch (pCur->vector_type) {
  2303. case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
  2304. sqlite3_result_double(context, ((f32 *)pCur->vector)[pCur->iRowid]);
  2305. break;
  2306. }
  2307. case SQLITE_VEC_ELEMENT_TYPE_BIT: {
  2308. u8 x = ((u8 *)pCur->vector)[pCur->iRowid / CHAR_BIT];
  2309. sqlite3_result_int(context,
  2310. (x & (0b10000000 >> ((pCur->iRowid % CHAR_BIT)))) > 0);
  2311. break;
  2312. }
  2313. case SQLITE_VEC_ELEMENT_TYPE_INT8: {
  2314. sqlite3_result_int(context, ((i8 *)pCur->vector)[pCur->iRowid]);
  2315. break;
  2316. }
  2317. }
  2318. break;
  2319. }
  2320. return SQLITE_OK;
  2321. }
  2322. static sqlite3_module vec_eachModule = {
  2323. /* iVersion */ 0,
  2324. /* xCreate */ 0,
  2325. /* xConnect */ vec_eachConnect,
  2326. /* xBestIndex */ vec_eachBestIndex,
  2327. /* xDisconnect */ vec_eachDisconnect,
  2328. /* xDestroy */ 0,
  2329. /* xOpen */ vec_eachOpen,
  2330. /* xClose */ vec_eachClose,
  2331. /* xFilter */ vec_eachFilter,
  2332. /* xNext */ vec_eachNext,
  2333. /* xEof */ vec_eachEof,
  2334. /* xColumn */ vec_eachColumn,
  2335. /* xRowid */ vec_eachRowid,
  2336. /* xUpdate */ 0,
  2337. /* xBegin */ 0,
  2338. /* xSync */ 0,
  2339. /* xCommit */ 0,
  2340. /* xRollback */ 0,
  2341. /* xFindMethod */ 0,
  2342. /* xRename */ 0,
  2343. /* xSavepoint */ 0,
  2344. /* xRelease */ 0,
  2345. /* xRollbackTo */ 0,
  2346. /* xShadowName */ 0,
  2347. #if SQLITE_VERSION_NUMBER >= 3044000
  2348. /* xIntegrity */ 0
  2349. #endif
  2350. };
  2351. #pragma endregion
  2352. #pragma region vec_npy_each table function
  2353. enum NpyTokenType {
  2354. NPY_TOKEN_TYPE_IDENTIFIER,
  2355. NPY_TOKEN_TYPE_NUMBER,
  2356. NPY_TOKEN_TYPE_LPAREN,
  2357. NPY_TOKEN_TYPE_RPAREN,
  2358. NPY_TOKEN_TYPE_LBRACE,
  2359. NPY_TOKEN_TYPE_RBRACE,
  2360. NPY_TOKEN_TYPE_COLON,
  2361. NPY_TOKEN_TYPE_COMMA,
  2362. NPY_TOKEN_TYPE_STRING,
  2363. NPY_TOKEN_TYPE_FALSE,
  2364. };
  2365. struct NpyToken {
  2366. enum NpyTokenType token_type;
  2367. unsigned char *start;
  2368. unsigned char *end;
  2369. };
  2370. int npy_token_next(unsigned char *start, unsigned char *end,
  2371. struct NpyToken *out) {
  2372. unsigned char *ptr = start;
  2373. while (ptr < end) {
  2374. unsigned char curr = *ptr;
  2375. if (is_whitespace(curr)) {
  2376. ptr++;
  2377. continue;
  2378. } else if (curr == '(') {
  2379. out->start = ptr++;
  2380. out->end = ptr;
  2381. out->token_type = NPY_TOKEN_TYPE_LPAREN;
  2382. return VEC0_TOKEN_RESULT_SOME;
  2383. } else if (curr == ')') {
  2384. out->start = ptr++;
  2385. out->end = ptr;
  2386. out->token_type = NPY_TOKEN_TYPE_RPAREN;
  2387. return VEC0_TOKEN_RESULT_SOME;
  2388. } else if (curr == '{') {
  2389. out->start = ptr++;
  2390. out->end = ptr;
  2391. out->token_type = NPY_TOKEN_TYPE_LBRACE;
  2392. return VEC0_TOKEN_RESULT_SOME;
  2393. } else if (curr == '}') {
  2394. out->start = ptr++;
  2395. out->end = ptr;
  2396. out->token_type = NPY_TOKEN_TYPE_RBRACE;
  2397. return VEC0_TOKEN_RESULT_SOME;
  2398. } else if (curr == ':') {
  2399. out->start = ptr++;
  2400. out->end = ptr;
  2401. out->token_type = NPY_TOKEN_TYPE_COLON;
  2402. return VEC0_TOKEN_RESULT_SOME;
  2403. } else if (curr == ',') {
  2404. out->start = ptr++;
  2405. out->end = ptr;
  2406. out->token_type = NPY_TOKEN_TYPE_COMMA;
  2407. return VEC0_TOKEN_RESULT_SOME;
  2408. } else if (curr == '\'') {
  2409. unsigned char *start = ptr;
  2410. ptr++;
  2411. while (ptr < end) {
  2412. if ((*ptr) == '\'') {
  2413. break;
  2414. }
  2415. ptr++;
  2416. }
  2417. if ((*ptr) != '\'') {
  2418. return VEC0_TOKEN_RESULT_ERROR;
  2419. }
  2420. out->start = start;
  2421. out->end = ++ptr;
  2422. out->token_type = NPY_TOKEN_TYPE_STRING;
  2423. return VEC0_TOKEN_RESULT_SOME;
  2424. } else if (curr == 'F' &&
  2425. strncmp((char *)ptr, "False", strlen("False")) == 0) {
  2426. out->start = ptr;
  2427. out->end = (ptr + (int)strlen("False"));
  2428. ptr = out->end;
  2429. out->token_type = NPY_TOKEN_TYPE_FALSE;
  2430. return VEC0_TOKEN_RESULT_SOME;
  2431. } else if (is_digit(curr)) {
  2432. unsigned char *start = ptr;
  2433. while (ptr < end && (is_digit(*ptr))) {
  2434. ptr++;
  2435. }
  2436. out->start = start;
  2437. out->end = ptr;
  2438. out->token_type = NPY_TOKEN_TYPE_NUMBER;
  2439. return VEC0_TOKEN_RESULT_SOME;
  2440. } else {
  2441. return VEC0_TOKEN_RESULT_ERROR;
  2442. }
  2443. }
  2444. return VEC0_TOKEN_RESULT_ERROR;
  2445. }
  2446. struct NpyScanner {
  2447. unsigned char *start;
  2448. unsigned char *end;
  2449. unsigned char *ptr;
  2450. };
  2451. void npy_scanner_init(struct NpyScanner *scanner, const unsigned char *source,
  2452. int source_length) {
  2453. scanner->start = (unsigned char *)source;
  2454. scanner->end = (unsigned char *)source + source_length;
  2455. scanner->ptr = (unsigned char *)source;
  2456. }
  2457. int npy_scanner_next(struct NpyScanner *scanner, struct NpyToken *out) {
  2458. int rc = npy_token_next(scanner->start, scanner->end, out);
  2459. if (rc == VEC0_TOKEN_RESULT_SOME) {
  2460. scanner->start = out->end;
  2461. }
  2462. return rc;
  2463. }
  2464. #define NPY_PARSE_ERROR "Error parsing numpy array: "
  2465. int parse_npy_header(sqlite3_vtab *pVTab, const unsigned char *header,
  2466. size_t headerLength,
  2467. enum VectorElementType *out_element_type,
  2468. int *fortran_order, size_t *numElements,
  2469. size_t *numDimensions) {
  2470. struct NpyScanner scanner;
  2471. struct NpyToken token;
  2472. int rc;
  2473. npy_scanner_init(&scanner, header, headerLength);
  2474. if (npy_scanner_next(&scanner, &token) != VEC0_TOKEN_RESULT_SOME &&
  2475. token.token_type != NPY_TOKEN_TYPE_LBRACE) {
  2476. vtab_set_error(pVTab,
  2477. NPY_PARSE_ERROR "numpy header did not start with '{'");
  2478. return SQLITE_ERROR;
  2479. }
  2480. while (1) {
  2481. rc = npy_scanner_next(&scanner, &token);
  2482. if (rc != VEC0_TOKEN_RESULT_SOME) {
  2483. vtab_set_error(pVTab, NPY_PARSE_ERROR "expected key in numpy header");
  2484. return SQLITE_ERROR;
  2485. }
  2486. if (token.token_type == NPY_TOKEN_TYPE_RBRACE) {
  2487. break;
  2488. }
  2489. if (token.token_type != NPY_TOKEN_TYPE_STRING) {
  2490. vtab_set_error(pVTab, NPY_PARSE_ERROR
  2491. "expected a string as key in numpy header");
  2492. return SQLITE_ERROR;
  2493. }
  2494. unsigned char *key = token.start;
  2495. rc = npy_scanner_next(&scanner, &token);
  2496. if ((rc != VEC0_TOKEN_RESULT_SOME) ||
  2497. (token.token_type != NPY_TOKEN_TYPE_COLON)) {
  2498. vtab_set_error(pVTab, NPY_PARSE_ERROR
  2499. "expected a ':' after key in numpy header");
  2500. return SQLITE_ERROR;
  2501. }
  2502. if (strncmp((char *)key, "'descr'", strlen("'descr'")) == 0) {
  2503. rc = npy_scanner_next(&scanner, &token);
  2504. if ((rc != VEC0_TOKEN_RESULT_SOME) ||
  2505. (token.token_type != NPY_TOKEN_TYPE_STRING)) {
  2506. vtab_set_error(pVTab, NPY_PARSE_ERROR
  2507. "expected a string value after 'descr' key");
  2508. return SQLITE_ERROR;
  2509. }
  2510. if (strncmp((char *)token.start, "'<f4'", strlen("'<f4'")) != 0) {
  2511. vtab_set_error(
  2512. pVTab, NPY_PARSE_ERROR
  2513. "Only '<f4' values are supported in sqlite-vec numpy functions");
  2514. return SQLITE_ERROR;
  2515. }
  2516. *out_element_type = SQLITE_VEC_ELEMENT_TYPE_FLOAT32;
  2517. } else if (strncmp((char *)key, "'fortran_order'",
  2518. strlen("'fortran_order'")) == 0) {
  2519. rc = npy_scanner_next(&scanner, &token);
  2520. if (rc != VEC0_TOKEN_RESULT_SOME ||
  2521. token.token_type != NPY_TOKEN_TYPE_FALSE) {
  2522. vtab_set_error(pVTab, NPY_PARSE_ERROR
  2523. "Only fortran_order = False is supported in sqlite-vec "
  2524. "numpy functions");
  2525. return SQLITE_ERROR;
  2526. }
  2527. *fortran_order = 0;
  2528. } else if (strncmp((char *)key, "'shape'", strlen("'shape'")) == 0) {
  2529. // "(xxx, xxx)" OR (xxx,)
  2530. size_t first;
  2531. rc = npy_scanner_next(&scanner, &token);
  2532. if ((rc != VEC0_TOKEN_RESULT_SOME) ||
  2533. (token.token_type != NPY_TOKEN_TYPE_LPAREN)) {
  2534. vtab_set_error(pVTab, NPY_PARSE_ERROR
  2535. "Expected left parenthesis '(' after shape key");
  2536. return SQLITE_ERROR;
  2537. }
  2538. rc = npy_scanner_next(&scanner, &token);
  2539. if ((rc != VEC0_TOKEN_RESULT_SOME) ||
  2540. (token.token_type != NPY_TOKEN_TYPE_NUMBER)) {
  2541. vtab_set_error(pVTab, NPY_PARSE_ERROR
  2542. "Expected an initial number in shape value");
  2543. return SQLITE_ERROR;
  2544. }
  2545. first = strtol((char *)token.start, NULL, 10);
  2546. rc = npy_scanner_next(&scanner, &token);
  2547. if ((rc != VEC0_TOKEN_RESULT_SOME) ||
  2548. (token.token_type != NPY_TOKEN_TYPE_COMMA)) {
  2549. vtab_set_error(pVTab, NPY_PARSE_ERROR
  2550. "Expected comma after first shape value");
  2551. return SQLITE_ERROR;
  2552. }
  2553. rc = npy_scanner_next(&scanner, &token);
  2554. if (rc != VEC0_TOKEN_RESULT_SOME) {
  2555. vtab_set_error(pVTab, NPY_PARSE_ERROR
  2556. "unexpected header EOF while parsing shape");
  2557. return SQLITE_ERROR;
  2558. }
  2559. if (token.token_type == NPY_TOKEN_TYPE_NUMBER) {
  2560. *numElements = first;
  2561. *numDimensions = strtol((char *)token.start, NULL, 10);
  2562. rc = npy_scanner_next(&scanner, &token);
  2563. if ((rc != VEC0_TOKEN_RESULT_SOME) ||
  2564. (token.token_type != NPY_TOKEN_TYPE_RPAREN)) {
  2565. vtab_set_error(pVTab, NPY_PARSE_ERROR
  2566. "expected right parenthesis after shape value");
  2567. return SQLITE_ERROR;
  2568. }
  2569. } else if (token.token_type == NPY_TOKEN_TYPE_RPAREN) {
  2570. // '(0,)' means an empty array!
  2571. *numElements = first ? 1 : 0;
  2572. *numDimensions = first;
  2573. } else {
  2574. vtab_set_error(pVTab, NPY_PARSE_ERROR "unknown type in shape value");
  2575. return SQLITE_ERROR;
  2576. }
  2577. } else {
  2578. vtab_set_error(pVTab, NPY_PARSE_ERROR "unknown key in numpy header");
  2579. return SQLITE_ERROR;
  2580. }
  2581. rc = npy_scanner_next(&scanner, &token);
  2582. if ((rc != VEC0_TOKEN_RESULT_SOME) ||
  2583. (token.token_type != NPY_TOKEN_TYPE_COMMA)) {
  2584. vtab_set_error(pVTab, NPY_PARSE_ERROR "unknown extra token after value");
  2585. return SQLITE_ERROR;
  2586. }
  2587. }
  2588. return SQLITE_OK;
  2589. }
  2590. typedef struct vec_npy_each_vtab vec_npy_each_vtab;
  2591. struct vec_npy_each_vtab {
  2592. sqlite3_vtab base;
  2593. };
  2594. typedef enum {
  2595. VEC_NPY_EACH_INPUT_BUFFER,
  2596. VEC_NPY_EACH_INPUT_FILE,
  2597. } vec_npy_each_input_type;
  2598. typedef struct vec_npy_each_cursor vec_npy_each_cursor;
  2599. struct vec_npy_each_cursor {
  2600. sqlite3_vtab_cursor base;
  2601. i64 iRowid;
  2602. // sqlite-vec compatible type of vector
  2603. enum VectorElementType elementType;
  2604. // number of vectors in the npy array
  2605. size_t nElements;
  2606. // number of dimensions each vector has
  2607. size_t nDimensions;
  2608. vec_npy_each_input_type input_type;
  2609. // when input_type == VEC_NPY_EACH_INPUT_BUFFER
  2610. // Buffer containing the vector data, when reading from an in-memory buffer.
  2611. // Size: nElements * nDimensions * element_size
  2612. // Clean up with sqlite3_free() once complete
  2613. void *vector;
  2614. // when input_type == VEC_NPY_EACH_INPUT_FILE
  2615. // Opened npy file, when reading from a file.
  2616. // fclose() when complete.
  2617. #ifndef SQLITE_VEC_OMIT_FS
  2618. FILE *file;
  2619. #endif
  2620. // an in-memory buffer containing a portion of the npy array.
  2621. // Used for faster reading, instead of calling fread a lot.
  2622. // Will have a byte-size of fileBufferSize
  2623. void *chunksBuffer;
  2624. // size of allocated fileBuffer in bytes
  2625. size_t chunksBufferSize;
  2626. //// Maximum length of the buffer, in terms of number of vectors.
  2627. size_t maxChunks;
  2628. // Counter index of the current vector into of fileBuffer to yield.
  2629. // Starts at 0 once fileBuffer is read, and iterates to bufferLength.
  2630. // Resets to 0 once that "buffer" is yielded and a new one is read.
  2631. size_t currentChunkIndex;
  2632. size_t currentChunkSize;
  2633. // 0 when there are still more elements to read/yield, 1 when complete.
  2634. int eof;
  2635. };
  2636. static unsigned char NPY_MAGIC[6] = "\x93NUMPY";
  2637. #ifndef SQLITE_VEC_OMIT_FS
  2638. int parse_npy_file(sqlite3_vtab *pVTab, FILE *file, vec_npy_each_cursor *pCur) {
  2639. int n;
  2640. fseek(file, 0, SEEK_END);
  2641. long fileSize = ftell(file);
  2642. fseek(file, 0L, SEEK_SET);
  2643. unsigned char header[10];
  2644. n = fread(&header, sizeof(unsigned char), 10, file);
  2645. if (n != 10) {
  2646. vtab_set_error(pVTab, "numpy array file too short");
  2647. return SQLITE_ERROR;
  2648. }
  2649. if (memcmp(NPY_MAGIC, header, sizeof(NPY_MAGIC)) != 0) {
  2650. vtab_set_error(pVTab,
  2651. "numpy array file does not contain the 'magic' header");
  2652. return SQLITE_ERROR;
  2653. }
  2654. u8 major = header[6];
  2655. u8 minor = header[7];
  2656. uint16_t headerLength = 0;
  2657. memcpy(&headerLength, &header[8], sizeof(uint16_t));
  2658. size_t totalHeaderLength = sizeof(NPY_MAGIC) + sizeof(major) + sizeof(minor) +
  2659. sizeof(headerLength) + headerLength;
  2660. i32 dataSize = fileSize - totalHeaderLength;
  2661. if (dataSize < 0) {
  2662. vtab_set_error(pVTab, "numpy array file header length is invalid");
  2663. return SQLITE_ERROR;
  2664. }
  2665. unsigned char *headerX = sqlite3_malloc(headerLength);
  2666. if (headerLength && !headerX) {
  2667. return SQLITE_NOMEM;
  2668. }
  2669. n = fread(headerX, sizeof(char), headerLength, file);
  2670. if (n != headerLength) {
  2671. sqlite3_free(headerX);
  2672. vtab_set_error(pVTab, "numpy array file header length is invalid");
  2673. return SQLITE_ERROR;
  2674. }
  2675. int fortran_order;
  2676. enum VectorElementType element_type;
  2677. size_t numElements;
  2678. size_t numDimensions;
  2679. int rc = parse_npy_header(pVTab, headerX, headerLength, &element_type,
  2680. &fortran_order, &numElements, &numDimensions);
  2681. sqlite3_free(headerX);
  2682. if (rc != SQLITE_OK) {
  2683. // parse_npy_header already attackes an error emssage
  2684. return rc;
  2685. }
  2686. i32 expectedDataSize =
  2687. numElements * vector_byte_size(element_type, numDimensions);
  2688. if (expectedDataSize != dataSize) {
  2689. vtab_set_error(
  2690. pVTab, "numpy array file error: Expected a data size of %d, found %d",
  2691. expectedDataSize, dataSize);
  2692. return SQLITE_ERROR;
  2693. }
  2694. pCur->maxChunks = 1024;
  2695. pCur->chunksBufferSize =
  2696. (vector_byte_size(element_type, numDimensions)) * pCur->maxChunks;
  2697. pCur->chunksBuffer = sqlite3_malloc(pCur->chunksBufferSize);
  2698. if (pCur->chunksBufferSize && !pCur->chunksBuffer) {
  2699. return SQLITE_NOMEM;
  2700. }
  2701. pCur->currentChunkSize =
  2702. fread(pCur->chunksBuffer, vector_byte_size(element_type, numDimensions),
  2703. pCur->maxChunks, file);
  2704. pCur->currentChunkIndex = 0;
  2705. pCur->elementType = element_type;
  2706. pCur->nElements = numElements;
  2707. pCur->nDimensions = numDimensions;
  2708. pCur->input_type = VEC_NPY_EACH_INPUT_FILE;
  2709. pCur->eof = pCur->currentChunkSize == 0;
  2710. pCur->file = file;
  2711. return SQLITE_OK;
  2712. }
  2713. #endif
  2714. int parse_npy_buffer(sqlite3_vtab *pVTab, const unsigned char *buffer,
  2715. int bufferLength, void **data, size_t *numElements,
  2716. size_t *numDimensions,
  2717. enum VectorElementType *element_type) {
  2718. if (bufferLength < 10) {
  2719. // IMP: V03312_20150
  2720. vtab_set_error(pVTab, "numpy array too short");
  2721. return SQLITE_ERROR;
  2722. }
  2723. if (memcmp(NPY_MAGIC, buffer, sizeof(NPY_MAGIC)) != 0) {
  2724. // V11954_28792
  2725. vtab_set_error(pVTab, "numpy array does not contain the 'magic' header");
  2726. return SQLITE_ERROR;
  2727. }
  2728. u8 major = buffer[6];
  2729. u8 minor = buffer[7];
  2730. uint16_t headerLength = 0;
  2731. memcpy(&headerLength, &buffer[8], sizeof(uint16_t));
  2732. i32 totalHeaderLength = sizeof(NPY_MAGIC) + sizeof(major) + sizeof(minor) +
  2733. sizeof(headerLength) + headerLength;
  2734. i32 dataSize = bufferLength - totalHeaderLength;
  2735. if (dataSize < 0) {
  2736. vtab_set_error(pVTab, "numpy array header length is invalid");
  2737. return SQLITE_ERROR;
  2738. }
  2739. const unsigned char *header = &buffer[10];
  2740. int fortran_order;
  2741. int rc = parse_npy_header(pVTab, header, headerLength, element_type,
  2742. &fortran_order, numElements, numDimensions);
  2743. if (rc != SQLITE_OK) {
  2744. return rc;
  2745. }
  2746. i32 expectedDataSize =
  2747. (*numElements * vector_byte_size(*element_type, *numDimensions));
  2748. if (expectedDataSize != dataSize) {
  2749. vtab_set_error(pVTab,
  2750. "numpy array error: Expected a data size of %d, found %d",
  2751. expectedDataSize, dataSize);
  2752. return SQLITE_ERROR;
  2753. }
  2754. *data = (void *)&buffer[totalHeaderLength];
  2755. return SQLITE_OK;
  2756. }
  2757. static int vec_npy_eachConnect(sqlite3 *db, void *pAux, int argc,
  2758. const char *const *argv, sqlite3_vtab **ppVtab,
  2759. char **pzErr) {
  2760. UNUSED_PARAMETER(pAux);
  2761. UNUSED_PARAMETER(argc);
  2762. UNUSED_PARAMETER(argv);
  2763. UNUSED_PARAMETER(pzErr);
  2764. vec_npy_each_vtab *pNew;
  2765. int rc;
  2766. rc = sqlite3_declare_vtab(db, "CREATE TABLE x(vector, input hidden)");
  2767. #define VEC_NPY_EACH_COLUMN_VECTOR 0
  2768. #define VEC_NPY_EACH_COLUMN_INPUT 1
  2769. if (rc == SQLITE_OK) {
  2770. pNew = sqlite3_malloc(sizeof(*pNew));
  2771. *ppVtab = (sqlite3_vtab *)pNew;
  2772. if (pNew == 0)
  2773. return SQLITE_NOMEM;
  2774. memset(pNew, 0, sizeof(*pNew));
  2775. }
  2776. return rc;
  2777. }
  2778. static int vec_npy_eachDisconnect(sqlite3_vtab *pVtab) {
  2779. vec_npy_each_vtab *p = (vec_npy_each_vtab *)pVtab;
  2780. sqlite3_free(p);
  2781. return SQLITE_OK;
  2782. }
  2783. static int vec_npy_eachOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) {
  2784. UNUSED_PARAMETER(p);
  2785. vec_npy_each_cursor *pCur;
  2786. pCur = sqlite3_malloc(sizeof(*pCur));
  2787. if (pCur == 0)
  2788. return SQLITE_NOMEM;
  2789. memset(pCur, 0, sizeof(*pCur));
  2790. *ppCursor = &pCur->base;
  2791. return SQLITE_OK;
  2792. }
  2793. static int vec_npy_eachClose(sqlite3_vtab_cursor *cur) {
  2794. vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur;
  2795. #ifndef SQLITE_VEC_OMIT_FS
  2796. if (pCur->file) {
  2797. fclose(pCur->file);
  2798. pCur->file = NULL;
  2799. }
  2800. #endif
  2801. if (pCur->chunksBuffer) {
  2802. sqlite3_free(pCur->chunksBuffer);
  2803. pCur->chunksBuffer = NULL;
  2804. }
  2805. if (pCur->vector) {
  2806. pCur->vector = NULL;
  2807. }
  2808. sqlite3_free(pCur);
  2809. return SQLITE_OK;
  2810. }
  2811. static int vec_npy_eachBestIndex(sqlite3_vtab *pVTab,
  2812. sqlite3_index_info *pIdxInfo) {
  2813. int hasInput;
  2814. for (int i = 0; i < pIdxInfo->nConstraint; i++) {
  2815. const struct sqlite3_index_constraint *pCons = &pIdxInfo->aConstraint[i];
  2816. // printf("i=%d iColumn=%d, op=%d, usable=%d\n", i, pCons->iColumn,
  2817. // pCons->op, pCons->usable);
  2818. switch (pCons->iColumn) {
  2819. case VEC_NPY_EACH_COLUMN_INPUT: {
  2820. if (pCons->op == SQLITE_INDEX_CONSTRAINT_EQ && pCons->usable) {
  2821. hasInput = 1;
  2822. pIdxInfo->aConstraintUsage[i].argvIndex = 1;
  2823. pIdxInfo->aConstraintUsage[i].omit = 1;
  2824. }
  2825. break;
  2826. }
  2827. }
  2828. }
  2829. if (!hasInput) {
  2830. pVTab->zErrMsg = sqlite3_mprintf("input argument is required");
  2831. return SQLITE_ERROR;
  2832. }
  2833. pIdxInfo->estimatedCost = (double)100000;
  2834. pIdxInfo->estimatedRows = 100000;
  2835. return SQLITE_OK;
  2836. }
  2837. static int vec_npy_eachFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum,
  2838. const char *idxStr, int argc,
  2839. sqlite3_value **argv) {
  2840. UNUSED_PARAMETER(idxNum);
  2841. UNUSED_PARAMETER(idxStr);
  2842. assert(argc == 1);
  2843. int rc;
  2844. vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)pVtabCursor;
  2845. #ifndef SQLITE_VEC_OMIT_FS
  2846. if (pCur->file) {
  2847. fclose(pCur->file);
  2848. pCur->file = NULL;
  2849. }
  2850. #endif
  2851. if (pCur->chunksBuffer) {
  2852. sqlite3_free(pCur->chunksBuffer);
  2853. pCur->chunksBuffer = NULL;
  2854. }
  2855. if (pCur->vector) {
  2856. pCur->vector = NULL;
  2857. }
  2858. #ifndef SQLITE_VEC_OMIT_FS
  2859. struct VecNpyFile *f = NULL;
  2860. if ((f = sqlite3_value_pointer(argv[0], SQLITE_VEC_NPY_FILE_NAME))) {
  2861. FILE *file = fopen(f->path, "r");
  2862. if (!file) {
  2863. vtab_set_error(pVtabCursor->pVtab, "Could not open numpy file");
  2864. return SQLITE_ERROR;
  2865. }
  2866. rc = parse_npy_file(pVtabCursor->pVtab, file, pCur);
  2867. if (rc != SQLITE_OK) {
  2868. #ifndef SQLITE_VEC_OMIT_FS
  2869. fclose(file);
  2870. #endif
  2871. return rc;
  2872. }
  2873. } else
  2874. #endif
  2875. {
  2876. const unsigned char *input = sqlite3_value_blob(argv[0]);
  2877. int inputLength = sqlite3_value_bytes(argv[0]);
  2878. void *data;
  2879. size_t numElements;
  2880. size_t numDimensions;
  2881. enum VectorElementType element_type;
  2882. rc = parse_npy_buffer(pVtabCursor->pVtab, input, inputLength, &data,
  2883. &numElements, &numDimensions, &element_type);
  2884. if (rc != SQLITE_OK) {
  2885. return rc;
  2886. }
  2887. pCur->vector = data;
  2888. pCur->elementType = element_type;
  2889. pCur->nElements = numElements;
  2890. pCur->nDimensions = numDimensions;
  2891. pCur->input_type = VEC_NPY_EACH_INPUT_BUFFER;
  2892. }
  2893. pCur->iRowid = 0;
  2894. return SQLITE_OK;
  2895. }
  2896. static int vec_npy_eachRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) {
  2897. vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur;
  2898. *pRowid = pCur->iRowid;
  2899. return SQLITE_OK;
  2900. }
  2901. static int vec_npy_eachEof(sqlite3_vtab_cursor *cur) {
  2902. vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur;
  2903. if (pCur->input_type == VEC_NPY_EACH_INPUT_BUFFER) {
  2904. return (!pCur->nElements) || (size_t)pCur->iRowid >= pCur->nElements;
  2905. }
  2906. return pCur->eof;
  2907. }
  2908. static int vec_npy_eachNext(sqlite3_vtab_cursor *cur) {
  2909. vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur;
  2910. pCur->iRowid++;
  2911. if (pCur->input_type == VEC_NPY_EACH_INPUT_BUFFER) {
  2912. return SQLITE_OK;
  2913. }
  2914. #ifndef SQLITE_VEC_OMIT_FS
  2915. // else: input is a file
  2916. pCur->currentChunkIndex++;
  2917. if (pCur->currentChunkIndex >= pCur->currentChunkSize) {
  2918. pCur->currentChunkSize =
  2919. fread(pCur->chunksBuffer,
  2920. vector_byte_size(pCur->elementType, pCur->nDimensions),
  2921. pCur->maxChunks, pCur->file);
  2922. if (!pCur->currentChunkSize) {
  2923. pCur->eof = 1;
  2924. }
  2925. pCur->currentChunkIndex = 0;
  2926. }
  2927. #endif
  2928. return SQLITE_OK;
  2929. }
  2930. static int vec_npy_eachColumnBuffer(vec_npy_each_cursor *pCur,
  2931. sqlite3_context *context, int i) {
  2932. switch (i) {
  2933. case VEC_NPY_EACH_COLUMN_VECTOR: {
  2934. sqlite3_result_subtype(context, pCur->elementType);
  2935. switch (pCur->elementType) {
  2936. case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
  2937. sqlite3_result_blob(
  2938. context,
  2939. &((unsigned char *)
  2940. pCur->vector)[pCur->iRowid * pCur->nDimensions * sizeof(f32)],
  2941. pCur->nDimensions * sizeof(f32), SQLITE_TRANSIENT);
  2942. break;
  2943. }
  2944. case SQLITE_VEC_ELEMENT_TYPE_INT8:
  2945. case SQLITE_VEC_ELEMENT_TYPE_BIT: {
  2946. // https://github.com/asg017/sqlite-vec/issues/42
  2947. sqlite3_result_error(context,
  2948. "vec_npy_each only supports float32 vectors", -1);
  2949. break;
  2950. }
  2951. }
  2952. break;
  2953. }
  2954. }
  2955. return SQLITE_OK;
  2956. }
  2957. static int vec_npy_eachColumnFile(vec_npy_each_cursor *pCur,
  2958. sqlite3_context *context, int i) {
  2959. switch (i) {
  2960. case VEC_NPY_EACH_COLUMN_VECTOR: {
  2961. switch (pCur->elementType) {
  2962. case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
  2963. sqlite3_result_blob(
  2964. context,
  2965. &((unsigned char *)
  2966. pCur->chunksBuffer)[pCur->currentChunkIndex *
  2967. pCur->nDimensions * sizeof(f32)],
  2968. pCur->nDimensions * sizeof(f32), SQLITE_TRANSIENT);
  2969. break;
  2970. }
  2971. case SQLITE_VEC_ELEMENT_TYPE_INT8:
  2972. case SQLITE_VEC_ELEMENT_TYPE_BIT: {
  2973. // https://github.com/asg017/sqlite-vec/issues/42
  2974. sqlite3_result_error(context,
  2975. "vec_npy_each only supports float32 vectors", -1);
  2976. break;
  2977. }
  2978. }
  2979. break;
  2980. }
  2981. }
  2982. return SQLITE_OK;
  2983. }
  2984. static int vec_npy_eachColumn(sqlite3_vtab_cursor *cur,
  2985. sqlite3_context *context, int i) {
  2986. vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur;
  2987. switch (pCur->input_type) {
  2988. case VEC_NPY_EACH_INPUT_BUFFER:
  2989. return vec_npy_eachColumnBuffer(pCur, context, i);
  2990. case VEC_NPY_EACH_INPUT_FILE:
  2991. return vec_npy_eachColumnFile(pCur, context, i);
  2992. }
  2993. return SQLITE_ERROR;
  2994. }
  2995. static sqlite3_module vec_npy_eachModule = {
  2996. /* iVersion */ 0,
  2997. /* xCreate */ 0,
  2998. /* xConnect */ vec_npy_eachConnect,
  2999. /* xBestIndex */ vec_npy_eachBestIndex,
  3000. /* xDisconnect */ vec_npy_eachDisconnect,
  3001. /* xDestroy */ 0,
  3002. /* xOpen */ vec_npy_eachOpen,
  3003. /* xClose */ vec_npy_eachClose,
  3004. /* xFilter */ vec_npy_eachFilter,
  3005. /* xNext */ vec_npy_eachNext,
  3006. /* xEof */ vec_npy_eachEof,
  3007. /* xColumn */ vec_npy_eachColumn,
  3008. /* xRowid */ vec_npy_eachRowid,
  3009. /* xUpdate */ 0,
  3010. /* xBegin */ 0,
  3011. /* xSync */ 0,
  3012. /* xCommit */ 0,
  3013. /* xRollback */ 0,
  3014. /* xFindMethod */ 0,
  3015. /* xRename */ 0,
  3016. /* xSavepoint */ 0,
  3017. /* xRelease */ 0,
  3018. /* xRollbackTo */ 0,
  3019. /* xShadowName */ 0,
  3020. #if SQLITE_VERSION_NUMBER >= 3044000
  3021. /* xIntegrity */ 0,
  3022. #endif
  3023. };
  3024. #pragma endregion
  3025. #pragma region vec0 virtual table
  3026. #define VEC0_COLUMN_ID 0
  3027. #define VEC0_COLUMN_USERN_START 1
  3028. #define VEC0_COLUMN_OFFSET_DISTANCE 1
  3029. #define VEC0_COLUMN_OFFSET_K 2
  3030. #define VEC0_SHADOW_INFO_NAME "\"%w\".\"%w_info\""
  3031. #define VEC0_SHADOW_CHUNKS_NAME "\"%w\".\"%w_chunks\""
  3032. /// 1) schema, 2) original vtab table name
  3033. #define VEC0_SHADOW_CHUNKS_CREATE \
  3034. "CREATE TABLE " VEC0_SHADOW_CHUNKS_NAME "(" \
  3035. "chunk_id INTEGER PRIMARY KEY AUTOINCREMENT," \
  3036. "size INTEGER NOT NULL," \
  3037. "validity BLOB NOT NULL," \
  3038. "rowids BLOB NOT NULL" \
  3039. ");"
  3040. #define VEC0_SHADOW_ROWIDS_NAME "\"%w\".\"%w_rowids\""
  3041. /// 1) schema, 2) original vtab table name
  3042. #define VEC0_SHADOW_ROWIDS_CREATE_BASIC \
  3043. "CREATE TABLE " VEC0_SHADOW_ROWIDS_NAME "(" \
  3044. "rowid INTEGER PRIMARY KEY AUTOINCREMENT," \
  3045. "id," \
  3046. "chunk_id INTEGER," \
  3047. "chunk_offset INTEGER" \
  3048. ");"
  3049. // vec0 tables with a text primary keys are still backed by int64 primary keys,
  3050. // since a fixed-length rowid is required for vec0 chunks. But we add a new 'id
  3051. // text unique' column to emulate a text primary key interface.
  3052. #define VEC0_SHADOW_ROWIDS_CREATE_PK_TEXT \
  3053. "CREATE TABLE " VEC0_SHADOW_ROWIDS_NAME "(" \
  3054. "rowid INTEGER PRIMARY KEY AUTOINCREMENT," \
  3055. "id TEXT UNIQUE NOT NULL," \
  3056. "chunk_id INTEGER," \
  3057. "chunk_offset INTEGER" \
  3058. ");"
  3059. /// 1) schema, 2) original vtab table name
  3060. #define VEC0_SHADOW_VECTOR_N_NAME "\"%w\".\"%w_vector_chunks%02d\""
  3061. /// 1) schema, 2) original vtab table name
  3062. #define VEC0_SHADOW_VECTOR_N_CREATE \
  3063. "CREATE TABLE " VEC0_SHADOW_VECTOR_N_NAME "(" \
  3064. "rowid PRIMARY KEY," \
  3065. "vectors BLOB NOT NULL" \
  3066. ");"
  3067. #define VEC0_SHADOW_AUXILIARY_NAME "\"%w\".\"%w_auxiliary\""
  3068. #define VEC0_SHADOW_METADATA_N_NAME "\"%w\".\"%w_metadatachunks%02d\""
  3069. #define VEC0_SHADOW_METADATA_TEXT_DATA_NAME "\"%w\".\"%w_metadatatext%02d\""
  3070. #define VEC_INTERAL_ERROR "Internal sqlite-vec error: "
  3071. #define REPORT_URL "https://github.com/asg017/sqlite-vec/issues/new"
  3072. typedef struct vec0_vtab vec0_vtab;
  3073. #define VEC0_MAX_VECTOR_COLUMNS 16
  3074. #define VEC0_MAX_PARTITION_COLUMNS 4
  3075. #define VEC0_MAX_AUXILIARY_COLUMNS 16
  3076. #define VEC0_MAX_METADATA_COLUMNS 16
  3077. #define SQLITE_VEC_VEC0_MAX_DIMENSIONS 8192
  3078. #define VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH 16
  3079. #define VEC0_METADATA_TEXT_VIEW_DATA_LENGTH 12
  3080. typedef enum {
  3081. // vector column, ie "contents_embedding float[1024]"
  3082. SQLITE_VEC0_USER_COLUMN_KIND_VECTOR = 1,
  3083. // partition key column, ie "user_id integer partition key"
  3084. SQLITE_VEC0_USER_COLUMN_KIND_PARTITION = 2,
  3085. //
  3086. SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY = 3,
  3087. // metadata column that can be filtered, ie "genre text"
  3088. SQLITE_VEC0_USER_COLUMN_KIND_METADATA = 4,
  3089. } vec0_user_column_kind;
  3090. struct vec0_vtab {
  3091. sqlite3_vtab base;
  3092. // the SQLite connection of the host database
  3093. sqlite3 *db;
  3094. // True if the primary key of the vec0 table has a column type TEXT.
  3095. // Will change the schema of the _rowids table, and insert/query logic.
  3096. int pkIsText;
  3097. // number of defined vector columns.
  3098. int numVectorColumns;
  3099. // number of defined PARTITION KEY columns.
  3100. int numPartitionColumns;
  3101. // number of defined auxiliary columns
  3102. int numAuxiliaryColumns;
  3103. // number of defined metadata columns
  3104. int numMetadataColumns;
  3105. // Name of the schema the table exists on.
  3106. // Must be freed with sqlite3_free()
  3107. char *schemaName;
  3108. // Name of the table the table exists on.
  3109. // Must be freed with sqlite3_free()
  3110. char *tableName;
  3111. // Name of the _rowids shadow table.
  3112. // Must be freed with sqlite3_free()
  3113. char *shadowRowidsName;
  3114. // Name of the _chunks shadow table.
  3115. // Must be freed with sqlite3_free()
  3116. char *shadowChunksName;
  3117. // contains enum vec0_user_column_kind values for up to
  3118. // numVectorColumns + numPartitionColumns entries
  3119. vec0_user_column_kind user_column_kinds[VEC0_MAX_VECTOR_COLUMNS + VEC0_MAX_PARTITION_COLUMNS + VEC0_MAX_AUXILIARY_COLUMNS + VEC0_MAX_METADATA_COLUMNS];
  3120. uint8_t user_column_idxs[VEC0_MAX_VECTOR_COLUMNS + VEC0_MAX_PARTITION_COLUMNS + VEC0_MAX_AUXILIARY_COLUMNS + VEC0_MAX_METADATA_COLUMNS];
  3121. // Name of all the vector chunk shadow tables.
  3122. // Ex '_vector_chunks00'
  3123. // Only the first numVectorColumns entries will be available.
  3124. // The first numVectorColumns entries must be freed with sqlite3_free()
  3125. char *shadowVectorChunksNames[VEC0_MAX_VECTOR_COLUMNS];
  3126. // Name of all metadata chunk shadow tables, ie `_metadatachunks00`
  3127. // Only the first numMetadataColumns entries will be available.
  3128. // The first numMetadataColumns entries must be freed with sqlite3_free()
  3129. char *shadowMetadataChunksNames[VEC0_MAX_METADATA_COLUMNS];
  3130. struct VectorColumnDefinition vector_columns[VEC0_MAX_VECTOR_COLUMNS];
  3131. struct Vec0PartitionColumnDefinition paritition_columns[VEC0_MAX_PARTITION_COLUMNS];
  3132. struct Vec0AuxiliaryColumnDefinition auxiliary_columns[VEC0_MAX_AUXILIARY_COLUMNS];
  3133. struct Vec0MetadataColumnDefinition metadata_columns[VEC0_MAX_METADATA_COLUMNS];
  3134. int chunk_size;
  3135. // select latest chunk from _chunks, getting chunk_id
  3136. sqlite3_stmt *stmtLatestChunk;
  3137. /**
  3138. * Statement to insert a row into the _rowids table, with a rowid.
  3139. * Parameters:
  3140. * 1: int64, rowid to insert
  3141. * Result columns: none
  3142. * SQL: "INSERT INTO _rowids(rowid) VALUES (?)"
  3143. *
  3144. * Must be cleaned up with sqlite3_finalize().
  3145. */
  3146. sqlite3_stmt *stmtRowidsInsertRowid;
  3147. /**
  3148. * Statement to insert a row into the _rowids table, with an id.
  3149. * The id column isn't a tradition primary key, but instead a unique
  3150. * column to handle "text primary key" vec0 tables. The true int64 rowid
  3151. * can be retrieved after inserting with sqlite3_last_rowid().
  3152. *
  3153. * Parameters:
  3154. * 1: text or null, id to insert
  3155. * Result columns: none
  3156. *
  3157. * Must be cleaned up with sqlite3_finalize().
  3158. */
  3159. sqlite3_stmt *stmtRowidsInsertId;
  3160. /**
  3161. * Statement to update the "position" columns chunk_id and chunk_offset for
  3162. * a given _rowids row. Used when the "next available" chunk position is found
  3163. * for a vector.
  3164. *
  3165. * Parameters:
  3166. * 1: int64, chunk_id value
  3167. * 2: int64, chunk_offset value
  3168. * 3: int64, rowid value
  3169. * Result columns: none
  3170. *
  3171. * Must be cleaned up with sqlite3_finalize().
  3172. */
  3173. sqlite3_stmt *stmtRowidsUpdatePosition;
  3174. /**
  3175. * Statement to quickly find the chunk_id + chunk_offset of a given row.
  3176. * Parameters:
  3177. * 1: rowid of the row/vector to lookup
  3178. * Result columns:
  3179. * 0: chunk_id (i64)
  3180. * 1: chunk_offset (i64)
  3181. * SQL: "SELECT id, chunk_id, chunk_offset FROM _rowids WHERE rowid = ?""
  3182. *
  3183. * Must be cleaned up with sqlite3_finalize().
  3184. */
  3185. sqlite3_stmt *stmtRowidsGetChunkPosition;
  3186. };
  3187. /**
  3188. * @brief Finalize all the sqlite3_stmt members in a vec0_vtab.
  3189. *
  3190. * @param p vec0_vtab pointer
  3191. */
  3192. void vec0_free_resources(vec0_vtab *p) {
  3193. sqlite3_finalize(p->stmtLatestChunk);
  3194. p->stmtLatestChunk = NULL;
  3195. sqlite3_finalize(p->stmtRowidsInsertRowid);
  3196. p->stmtRowidsInsertRowid = NULL;
  3197. sqlite3_finalize(p->stmtRowidsInsertId);
  3198. p->stmtRowidsInsertId = NULL;
  3199. sqlite3_finalize(p->stmtRowidsUpdatePosition);
  3200. p->stmtRowidsUpdatePosition = NULL;
  3201. sqlite3_finalize(p->stmtRowidsGetChunkPosition);
  3202. p->stmtRowidsGetChunkPosition = NULL;
  3203. }
  3204. /**
  3205. * @brief Free all memory and sqlite3_stmt members of a vec0_vtab
  3206. *
  3207. * @param p vec0_vtab pointer
  3208. */
  3209. void vec0_free(vec0_vtab *p) {
  3210. vec0_free_resources(p);
  3211. sqlite3_free(p->schemaName);
  3212. p->schemaName = NULL;
  3213. sqlite3_free(p->tableName);
  3214. p->tableName = NULL;
  3215. sqlite3_free(p->shadowChunksName);
  3216. p->shadowChunksName = NULL;
  3217. sqlite3_free(p->shadowRowidsName);
  3218. p->shadowRowidsName = NULL;
  3219. for (int i = 0; i < p->numVectorColumns; i++) {
  3220. sqlite3_free(p->shadowVectorChunksNames[i]);
  3221. p->shadowVectorChunksNames[i] = NULL;
  3222. sqlite3_free(p->vector_columns[i].name);
  3223. p->vector_columns[i].name = NULL;
  3224. }
  3225. }
  3226. int vec0_num_defined_user_columns(vec0_vtab *p) {
  3227. return p->numVectorColumns + p->numPartitionColumns + p->numAuxiliaryColumns + p->numMetadataColumns;
  3228. }
  3229. /**
  3230. * @brief Returns the index of the distance hidden column for the given vec0
  3231. * table.
  3232. *
  3233. * @param p vec0 table
  3234. * @return int
  3235. */
  3236. int vec0_column_distance_idx(vec0_vtab *p) {
  3237. return VEC0_COLUMN_USERN_START + (vec0_num_defined_user_columns(p) - 1) +
  3238. VEC0_COLUMN_OFFSET_DISTANCE;
  3239. }
  3240. /**
  3241. * @brief Returns the index of the k hidden column for the given vec0 table.
  3242. *
  3243. * @param p vec0 table
  3244. * @return int k column index
  3245. */
  3246. int vec0_column_k_idx(vec0_vtab *p) {
  3247. return VEC0_COLUMN_USERN_START + (vec0_num_defined_user_columns(p) - 1) +
  3248. VEC0_COLUMN_OFFSET_K;
  3249. }
  3250. /**
  3251. * Returns 1 if the given column-based index is a valid vector column,
  3252. * 0 otherwise.
  3253. */
  3254. int vec0_column_idx_is_vector(vec0_vtab *pVtab, int column_idx) {
  3255. return column_idx >= VEC0_COLUMN_USERN_START &&
  3256. column_idx <= (VEC0_COLUMN_USERN_START + vec0_num_defined_user_columns(pVtab) - 1) &&
  3257. pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START] == SQLITE_VEC0_USER_COLUMN_KIND_VECTOR;
  3258. }
  3259. /**
  3260. * Returns the vector index of the given user column index.
  3261. * ONLY call if validated with vec0_column_idx_is_vector before
  3262. */
  3263. int vec0_column_idx_to_vector_idx(vec0_vtab *pVtab, int column_idx) {
  3264. UNUSED_PARAMETER(pVtab);
  3265. return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START];
  3266. }
  3267. /**
  3268. * Returns 1 if the given column-based index is a "partition key" column,
  3269. * 0 otherwise.
  3270. */
  3271. int vec0_column_idx_is_partition(vec0_vtab *pVtab, int column_idx) {
  3272. return column_idx >= VEC0_COLUMN_USERN_START &&
  3273. column_idx <= (VEC0_COLUMN_USERN_START + vec0_num_defined_user_columns(pVtab) - 1) &&
  3274. pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START] == SQLITE_VEC0_USER_COLUMN_KIND_PARTITION;
  3275. }
  3276. /**
  3277. * Returns the partition column index of the given user column index.
  3278. * ONLY call if validated with vec0_column_idx_is_vector before
  3279. */
  3280. int vec0_column_idx_to_partition_idx(vec0_vtab *pVtab, int column_idx) {
  3281. UNUSED_PARAMETER(pVtab);
  3282. return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START];
  3283. }
  3284. /**
  3285. * Returns 1 if the given column-based index is a auxiliary column,
  3286. * 0 otherwise.
  3287. */
  3288. int vec0_column_idx_is_auxiliary(vec0_vtab *pVtab, int column_idx) {
  3289. return column_idx >= VEC0_COLUMN_USERN_START &&
  3290. column_idx <= (VEC0_COLUMN_USERN_START + vec0_num_defined_user_columns(pVtab) - 1) &&
  3291. pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START] == SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY;
  3292. }
  3293. /**
  3294. * Returns the auxiliary column index of the given user column index.
  3295. * ONLY call if validated with vec0_column_idx_to_partition_idx before
  3296. */
  3297. int vec0_column_idx_to_auxiliary_idx(vec0_vtab *pVtab, int column_idx) {
  3298. UNUSED_PARAMETER(pVtab);
  3299. return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START];
  3300. }
  3301. /**
  3302. * Returns 1 if the given column-based index is a metadata column,
  3303. * 0 otherwise.
  3304. */
  3305. int vec0_column_idx_is_metadata(vec0_vtab *pVtab, int column_idx) {
  3306. return column_idx >= VEC0_COLUMN_USERN_START &&
  3307. column_idx <= (VEC0_COLUMN_USERN_START + vec0_num_defined_user_columns(pVtab) - 1) &&
  3308. pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START] == SQLITE_VEC0_USER_COLUMN_KIND_METADATA;
  3309. }
  3310. /**
  3311. * Returns the metadata column index of the given user column index.
  3312. * ONLY call if validated with vec0_column_idx_is_metadata before
  3313. */
  3314. int vec0_column_idx_to_metadata_idx(vec0_vtab *pVtab, int column_idx) {
  3315. UNUSED_PARAMETER(pVtab);
  3316. return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START];
  3317. }
  3318. /**
  3319. * @brief Retrieve the chunk_id, chunk_offset, and possible "id" value
  3320. * of a vec0_vtab row with the provided rowid
  3321. *
  3322. * @param p vec0_vtab
  3323. * @param rowid the rowid of the row to query
  3324. * @param id output, optional sqlite3_value to provide the id.
  3325. * Useful for text PK rows. Must be freed with sqlite3_value_free()
  3326. * @param chunk_id output, the chunk_id the row belongs to
  3327. * @param chunk_offset output, the offset within the chunk the row belongs to
  3328. * @return SQLITE_ROW on success, error code otherwise. SQLITE_EMPTY if row DNE
  3329. */
  3330. int vec0_get_chunk_position(vec0_vtab *p, i64 rowid, sqlite3_value **id,
  3331. i64 *chunk_id, i64 *chunk_offset) {
  3332. int rc;
  3333. if (!p->stmtRowidsGetChunkPosition) {
  3334. const char *zSql =
  3335. sqlite3_mprintf("SELECT id, chunk_id, chunk_offset "
  3336. "FROM " VEC0_SHADOW_ROWIDS_NAME " WHERE rowid = ?",
  3337. p->schemaName, p->tableName);
  3338. if (!zSql) {
  3339. rc = SQLITE_NOMEM;
  3340. goto cleanup;
  3341. }
  3342. rc = sqlite3_prepare_v2(p->db, zSql, -1, &p->stmtRowidsGetChunkPosition, 0);
  3343. sqlite3_free((void *)zSql);
  3344. if (rc != SQLITE_OK) {
  3345. vtab_set_error(
  3346. &p->base, VEC_INTERAL_ERROR
  3347. "could not initialize 'rowids get chunk position' statement");
  3348. goto cleanup;
  3349. }
  3350. }
  3351. sqlite3_bind_int64(p->stmtRowidsGetChunkPosition, 1, rowid);
  3352. rc = sqlite3_step(p->stmtRowidsGetChunkPosition);
  3353. // special case: when no results, return SQLITE_EMPTY to convey "that chunk
  3354. // position doesnt exist"
  3355. if (rc == SQLITE_DONE) {
  3356. rc = SQLITE_EMPTY;
  3357. goto cleanup;
  3358. }
  3359. if (rc != SQLITE_ROW) {
  3360. goto cleanup;
  3361. }
  3362. if (id) {
  3363. sqlite3_value *value =
  3364. sqlite3_column_value(p->stmtRowidsGetChunkPosition, 0);
  3365. *id = sqlite3_value_dup(value);
  3366. if (!*id) {
  3367. rc = SQLITE_NOMEM;
  3368. goto cleanup;
  3369. }
  3370. }
  3371. if (chunk_id) {
  3372. *chunk_id = sqlite3_column_int64(p->stmtRowidsGetChunkPosition, 1);
  3373. }
  3374. if (chunk_offset) {
  3375. *chunk_offset = sqlite3_column_int64(p->stmtRowidsGetChunkPosition, 2);
  3376. }
  3377. rc = SQLITE_OK;
  3378. cleanup:
  3379. sqlite3_reset(p->stmtRowidsGetChunkPosition);
  3380. sqlite3_clear_bindings(p->stmtRowidsGetChunkPosition);
  3381. return rc;
  3382. }
  3383. /**
  3384. * @brief Return the id value from the _rowids table where _rowids.rowid =
  3385. * rowid.
  3386. *
  3387. * @param pVtab: vec0 table to query
  3388. * @param rowid: rowid of the row to query.
  3389. * @param out: A dup'ed sqlite3_value of the id column. Might be null.
  3390. * Must be cleaned up with sqlite3_value_free().
  3391. * @returns SQLITE_OK on success, error code on failure
  3392. */
  3393. int vec0_get_id_value_from_rowid(vec0_vtab *pVtab, i64 rowid,
  3394. sqlite3_value **out) {
  3395. // PERF: different strategy than get_chunk_position?
  3396. return vec0_get_chunk_position((vec0_vtab *)pVtab, rowid, out, NULL, NULL);
  3397. }
  3398. int vec0_rowid_from_id(vec0_vtab *p, sqlite3_value *valueId, i64 *rowid) {
  3399. sqlite3_stmt *stmt = NULL;
  3400. int rc;
  3401. char *zSql;
  3402. zSql = sqlite3_mprintf("SELECT rowid"
  3403. " FROM " VEC0_SHADOW_ROWIDS_NAME " WHERE id = ?",
  3404. p->schemaName, p->tableName);
  3405. if (!zSql) {
  3406. rc = SQLITE_NOMEM;
  3407. goto cleanup;
  3408. }
  3409. rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
  3410. sqlite3_free(zSql);
  3411. if (rc != SQLITE_OK) {
  3412. goto cleanup;
  3413. }
  3414. sqlite3_bind_value(stmt, 1, valueId);
  3415. rc = sqlite3_step(stmt);
  3416. if (rc == SQLITE_DONE) {
  3417. rc = SQLITE_EMPTY;
  3418. goto cleanup;
  3419. }
  3420. if (rc != SQLITE_ROW) {
  3421. goto cleanup;
  3422. }
  3423. *rowid = sqlite3_column_int64(stmt, 0);
  3424. rc = sqlite3_step(stmt);
  3425. if (rc != SQLITE_DONE) {
  3426. goto cleanup;
  3427. }
  3428. rc = SQLITE_OK;
  3429. cleanup:
  3430. sqlite3_finalize(stmt);
  3431. return rc;
  3432. }
  3433. int vec0_result_id(vec0_vtab *p, sqlite3_context *context, i64 rowid) {
  3434. if (!p->pkIsText) {
  3435. sqlite3_result_int64(context, rowid);
  3436. return SQLITE_OK;
  3437. }
  3438. sqlite3_value *valueId;
  3439. int rc = vec0_get_id_value_from_rowid(p, rowid, &valueId);
  3440. if (rc != SQLITE_OK) {
  3441. return rc;
  3442. }
  3443. if (!valueId) {
  3444. sqlite3_result_error_nomem(context);
  3445. } else {
  3446. sqlite3_result_value(context, valueId);
  3447. sqlite3_value_free(valueId);
  3448. }
  3449. return SQLITE_OK;
  3450. }
  3451. /**
  3452. * @brief
  3453. *
  3454. * @param pVtab: virtual table to query
  3455. * @param rowid: row to lookup
  3456. * @param vector_column_idx: which vector column to query
  3457. * @param outVector: Output pointer to the vector buffer.
  3458. * Must be sqlite3_free()'ed.
  3459. * @param outVectorSize: Pointer to a int where the size of outVector
  3460. * will be stored.
  3461. * @return int SQLITE_OK on success.
  3462. */
  3463. int vec0_get_vector_data(vec0_vtab *pVtab, i64 rowid, int vector_column_idx,
  3464. void **outVector, int *outVectorSize) {
  3465. vec0_vtab *p = pVtab;
  3466. int rc, brc;
  3467. i64 chunk_id;
  3468. i64 chunk_offset;
  3469. size_t size;
  3470. void *buf = NULL;
  3471. int blobOffset;
  3472. sqlite3_blob *vectorBlob = NULL;
  3473. assert((vector_column_idx >= 0) &&
  3474. (vector_column_idx < pVtab->numVectorColumns));
  3475. rc = vec0_get_chunk_position(pVtab, rowid, NULL, &chunk_id, &chunk_offset);
  3476. if (rc == SQLITE_EMPTY) {
  3477. vtab_set_error(&pVtab->base, "Could not find a row with rowid %lld", rowid);
  3478. goto cleanup;
  3479. }
  3480. if (rc != SQLITE_OK) {
  3481. goto cleanup;
  3482. }
  3483. rc = sqlite3_blob_open(p->db, p->schemaName,
  3484. p->shadowVectorChunksNames[vector_column_idx],
  3485. "vectors", chunk_id, 0, &vectorBlob);
  3486. if (rc != SQLITE_OK) {
  3487. vtab_set_error(&pVtab->base,
  3488. "Could not fetch vector data for %lld, opening blob failed",
  3489. rowid);
  3490. rc = SQLITE_ERROR;
  3491. goto cleanup;
  3492. }
  3493. size = vector_column_byte_size(pVtab->vector_columns[vector_column_idx]);
  3494. blobOffset = chunk_offset * size;
  3495. buf = sqlite3_malloc(size);
  3496. if (!buf) {
  3497. rc = SQLITE_NOMEM;
  3498. goto cleanup;
  3499. }
  3500. rc = sqlite3_blob_read(vectorBlob, buf, size, blobOffset);
  3501. if (rc != SQLITE_OK) {
  3502. sqlite3_free(buf);
  3503. buf = NULL;
  3504. vtab_set_error(
  3505. &pVtab->base,
  3506. "Could not fetch vector data for %lld, reading from blob failed",
  3507. rowid);
  3508. rc = SQLITE_ERROR;
  3509. goto cleanup;
  3510. }
  3511. *outVector = buf;
  3512. if (outVectorSize) {
  3513. *outVectorSize = size;
  3514. }
  3515. rc = SQLITE_OK;
  3516. cleanup:
  3517. brc = sqlite3_blob_close(vectorBlob);
  3518. if ((rc == SQLITE_OK) && (brc != SQLITE_OK)) {
  3519. vtab_set_error(
  3520. &p->base, VEC_INTERAL_ERROR
  3521. "unknown error, could not close vector blob, please file an issue");
  3522. return brc;
  3523. }
  3524. return rc;
  3525. }
  3526. /**
  3527. * @brief Retrieve the sqlite3_value of the i'th partition value for the given row.
  3528. *
  3529. * @param pVtab - the vec0_vtab in questions
  3530. * @param rowid - rowid of target row
  3531. * @param partition_idx - which partition column to retrieve
  3532. * @param outValue - output sqlite3_value
  3533. * @return int - SQLITE_OK on success, otherwise error code
  3534. */
  3535. int vec0_get_partition_value_for_rowid(vec0_vtab *pVtab, i64 rowid, int partition_idx, sqlite3_value ** outValue) {
  3536. int rc;
  3537. i64 chunk_id;
  3538. i64 chunk_offset;
  3539. rc = vec0_get_chunk_position(pVtab, rowid, NULL, &chunk_id, &chunk_offset);
  3540. if(rc != SQLITE_OK) {
  3541. return rc;
  3542. }
  3543. sqlite3_stmt * stmt = NULL;
  3544. char * zSql = sqlite3_mprintf("SELECT partition%02d FROM " VEC0_SHADOW_CHUNKS_NAME " WHERE chunk_id = ?", partition_idx, pVtab->schemaName, pVtab->tableName);
  3545. if(!zSql) {
  3546. return SQLITE_NOMEM;
  3547. }
  3548. rc = sqlite3_prepare_v2(pVtab->db, zSql, -1, &stmt, NULL);
  3549. sqlite3_free(zSql);
  3550. if(rc != SQLITE_OK) {
  3551. return rc;
  3552. }
  3553. sqlite3_bind_int64(stmt, 1, chunk_id);
  3554. rc = sqlite3_step(stmt);
  3555. if(rc != SQLITE_ROW) {
  3556. rc = SQLITE_ERROR;
  3557. goto done;
  3558. }
  3559. *outValue = sqlite3_value_dup(sqlite3_column_value(stmt, 0));
  3560. if(!*outValue) {
  3561. rc = SQLITE_NOMEM;
  3562. goto done;
  3563. }
  3564. rc = SQLITE_OK;
  3565. done:
  3566. sqlite3_finalize(stmt);
  3567. return rc;
  3568. }
  3569. /**
  3570. * @brief Get the value of an auxiliary column for the given rowid
  3571. *
  3572. * @param pVtab vec0_vtab
  3573. * @param rowid the rowid of the row to lookup
  3574. * @param auxiliary_idx aux index of the column we care about
  3575. * @param outValue Output sqlite3_value to store
  3576. * @return int SQLITE_OK on success, error code otherwise
  3577. */
  3578. int vec0_get_auxiliary_value_for_rowid(vec0_vtab *pVtab, i64 rowid, int auxiliary_idx, sqlite3_value ** outValue) {
  3579. int rc;
  3580. sqlite3_stmt * stmt = NULL;
  3581. char * zSql = sqlite3_mprintf("SELECT value%02d FROM " VEC0_SHADOW_AUXILIARY_NAME " WHERE rowid = ?", auxiliary_idx, pVtab->schemaName, pVtab->tableName);
  3582. if(!zSql) {
  3583. return SQLITE_NOMEM;
  3584. }
  3585. rc = sqlite3_prepare_v2(pVtab->db, zSql, -1, &stmt, NULL);
  3586. sqlite3_free(zSql);
  3587. if(rc != SQLITE_OK) {
  3588. return rc;
  3589. }
  3590. sqlite3_bind_int64(stmt, 1, rowid);
  3591. rc = sqlite3_step(stmt);
  3592. if(rc != SQLITE_ROW) {
  3593. rc = SQLITE_ERROR;
  3594. goto done;
  3595. }
  3596. *outValue = sqlite3_value_dup(sqlite3_column_value(stmt, 0));
  3597. if(!*outValue) {
  3598. rc = SQLITE_NOMEM;
  3599. goto done;
  3600. }
  3601. rc = SQLITE_OK;
  3602. done:
  3603. sqlite3_finalize(stmt);
  3604. return rc;
  3605. }
  3606. /**
  3607. * @brief Result the given metadata value for the given row and metadata column index.
  3608. * Will traverse the metadatachunksNN table with BLOB I/0 for the given rowid.
  3609. *
  3610. * @param p
  3611. * @param rowid
  3612. * @param metadata_idx
  3613. * @param context
  3614. * @return int
  3615. */
  3616. int vec0_result_metadata_value_for_rowid(vec0_vtab *p, i64 rowid, int metadata_idx, sqlite3_context * context) {
  3617. int rc;
  3618. i64 chunk_id;
  3619. i64 chunk_offset;
  3620. rc = vec0_get_chunk_position(p, rowid, NULL, &chunk_id, &chunk_offset);
  3621. if(rc != SQLITE_OK) {
  3622. return rc;
  3623. }
  3624. sqlite3_blob * blobValue;
  3625. rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_idx], "data", chunk_id, 0, &blobValue);
  3626. if(rc != SQLITE_OK) {
  3627. return rc;
  3628. }
  3629. switch(p->metadata_columns[metadata_idx].kind) {
  3630. case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
  3631. u8 block;
  3632. rc = sqlite3_blob_read(blobValue, &block, sizeof(block), chunk_offset / CHAR_BIT);
  3633. if(rc != SQLITE_OK) {
  3634. goto done;
  3635. }
  3636. int value = block >> ((chunk_offset % CHAR_BIT)) & 1;
  3637. sqlite3_result_int(context, value);
  3638. break;
  3639. }
  3640. case VEC0_METADATA_COLUMN_KIND_INTEGER: {
  3641. i64 value;
  3642. rc = sqlite3_blob_read(blobValue, &value, sizeof(value), chunk_offset * sizeof(i64));
  3643. if(rc != SQLITE_OK) {
  3644. goto done;
  3645. }
  3646. sqlite3_result_int64(context, value);
  3647. break;
  3648. }
  3649. case VEC0_METADATA_COLUMN_KIND_FLOAT: {
  3650. double value;
  3651. rc = sqlite3_blob_read(blobValue, &value, sizeof(value), chunk_offset * sizeof(double));
  3652. if(rc != SQLITE_OK) {
  3653. goto done;
  3654. }
  3655. sqlite3_result_double(context, value);
  3656. break;
  3657. }
  3658. case VEC0_METADATA_COLUMN_KIND_TEXT: {
  3659. u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
  3660. rc = sqlite3_blob_read(blobValue, &view, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH, chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
  3661. if(rc != SQLITE_OK) {
  3662. goto done;
  3663. }
  3664. int length = ((int *)view)[0];
  3665. if(length <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
  3666. sqlite3_result_text(context, (const char*) (view + 4), length, SQLITE_TRANSIENT);
  3667. }
  3668. else {
  3669. sqlite3_stmt * stmt;
  3670. const char * zSql = sqlite3_mprintf("SELECT data FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " WHERE rowid = ?", p->schemaName, p->tableName, metadata_idx);
  3671. if(!zSql) {
  3672. rc = SQLITE_ERROR;
  3673. goto done;
  3674. }
  3675. rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
  3676. sqlite3_free((void *) zSql);
  3677. if(rc != SQLITE_OK) {
  3678. goto done;
  3679. }
  3680. sqlite3_bind_int64(stmt, 1, rowid);
  3681. rc = sqlite3_step(stmt);
  3682. if(rc != SQLITE_ROW) {
  3683. sqlite3_finalize(stmt);
  3684. rc = SQLITE_ERROR;
  3685. goto done;
  3686. }
  3687. sqlite3_result_value(context, sqlite3_column_value(stmt, 0));
  3688. sqlite3_finalize(stmt);
  3689. rc = SQLITE_OK;
  3690. }
  3691. break;
  3692. }
  3693. }
  3694. done:
  3695. // blobValue is read-only, will not fail on close
  3696. sqlite3_blob_close(blobValue);
  3697. return rc;
  3698. }
  3699. int vec0_get_latest_chunk_rowid(vec0_vtab *p, i64 *chunk_rowid, sqlite3_value ** partitionKeyValues) {
  3700. int rc;
  3701. const char *zSql;
  3702. // lazy initialize stmtLatestChunk when needed. May be cleared during xSync()
  3703. if (!p->stmtLatestChunk) {
  3704. if(p->numPartitionColumns > 0) {
  3705. sqlite3_str * s = sqlite3_str_new(NULL);
  3706. sqlite3_str_appendf(s, "SELECT max(rowid) FROM " VEC0_SHADOW_CHUNKS_NAME " WHERE ",
  3707. p->schemaName, p->tableName);
  3708. for(int i = 0; i < p->numPartitionColumns; i++) {
  3709. if(i != 0) {
  3710. sqlite3_str_appendall(s, " AND ");
  3711. }
  3712. sqlite3_str_appendf(s, " partition%02d = ? ", i);
  3713. }
  3714. zSql = sqlite3_str_finish(s);
  3715. }else {
  3716. zSql = sqlite3_mprintf("SELECT max(rowid) FROM " VEC0_SHADOW_CHUNKS_NAME,
  3717. p->schemaName, p->tableName);
  3718. }
  3719. if (!zSql) {
  3720. rc = SQLITE_NOMEM;
  3721. goto cleanup;
  3722. }
  3723. rc = sqlite3_prepare_v2(p->db, zSql, -1, &p->stmtLatestChunk, 0);
  3724. sqlite3_free((void *)zSql);
  3725. if (rc != SQLITE_OK) {
  3726. // IMP: V21406_05476
  3727. vtab_set_error(&p->base, VEC_INTERAL_ERROR
  3728. "could not initialize 'latest chunk' statement");
  3729. goto cleanup;
  3730. }
  3731. }
  3732. for(int i = 0; i < p->numPartitionColumns; i++) {
  3733. sqlite3_bind_value(p->stmtLatestChunk, i+1, (partitionKeyValues[i]));
  3734. }
  3735. rc = sqlite3_step(p->stmtLatestChunk);
  3736. if (rc != SQLITE_ROW) {
  3737. // IMP: V31559_15629
  3738. vtab_set_error(&p->base, VEC_INTERAL_ERROR "Could not find latest chunk");
  3739. rc = SQLITE_ERROR;
  3740. goto cleanup;
  3741. }
  3742. if(sqlite3_column_type(p->stmtLatestChunk, 0) == SQLITE_NULL){
  3743. rc = SQLITE_EMPTY;
  3744. goto cleanup;
  3745. }
  3746. *chunk_rowid = sqlite3_column_int64(p->stmtLatestChunk, 0);
  3747. rc = sqlite3_step(p->stmtLatestChunk);
  3748. if (rc != SQLITE_DONE) {
  3749. vtab_set_error(&p->base,
  3750. VEC_INTERAL_ERROR
  3751. "unknown result code when closing out stmtLatestChunk. "
  3752. "Please file an issue: " REPORT_URL,
  3753. p->schemaName, p->shadowChunksName);
  3754. goto cleanup;
  3755. }
  3756. rc = SQLITE_OK;
  3757. cleanup:
  3758. if (p->stmtLatestChunk) {
  3759. sqlite3_reset(p->stmtLatestChunk);
  3760. sqlite3_clear_bindings(p->stmtLatestChunk);
  3761. }
  3762. return rc;
  3763. }
  3764. int vec0_rowids_insert_rowid(vec0_vtab *p, i64 rowid) {
  3765. int rc = SQLITE_OK;
  3766. int entered = 0;
  3767. UNUSED_PARAMETER(entered); // temporary
  3768. if (!p->stmtRowidsInsertRowid) {
  3769. const char *zSql =
  3770. sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_ROWIDS_NAME "(rowid)"
  3771. "VALUES (?);",
  3772. p->schemaName, p->tableName);
  3773. if (!zSql) {
  3774. rc = SQLITE_NOMEM;
  3775. goto cleanup;
  3776. }
  3777. rc = sqlite3_prepare_v2(p->db, zSql, -1, &p->stmtRowidsInsertRowid, 0);
  3778. sqlite3_free((void *)zSql);
  3779. if (rc != SQLITE_OK) {
  3780. vtab_set_error(&p->base, VEC_INTERAL_ERROR
  3781. "could not initialize 'insert rowids' statement");
  3782. goto cleanup;
  3783. }
  3784. }
  3785. #if SQLITE_THREADSAFE
  3786. if (sqlite3_mutex_enter) {
  3787. sqlite3_mutex_enter(sqlite3_db_mutex(p->db));
  3788. entered = 1;
  3789. }
  3790. #endif
  3791. sqlite3_bind_int64(p->stmtRowidsInsertRowid, 1, rowid);
  3792. rc = sqlite3_step(p->stmtRowidsInsertRowid);
  3793. if (rc != SQLITE_DONE) {
  3794. if (sqlite3_extended_errcode(p->db) == SQLITE_CONSTRAINT_PRIMARYKEY) {
  3795. // IMP: V17090_01160
  3796. vtab_set_error(&p->base, "UNIQUE constraint failed on %s primary key",
  3797. p->tableName);
  3798. } else {
  3799. // IMP: V04679_21517
  3800. vtab_set_error(&p->base,
  3801. "Error inserting rowid into rowids shadow table: %s",
  3802. sqlite3_errmsg(sqlite3_db_handle(p->stmtRowidsInsertId)));
  3803. }
  3804. rc = SQLITE_ERROR;
  3805. goto cleanup;
  3806. }
  3807. rc = SQLITE_OK;
  3808. cleanup:
  3809. if (p->stmtRowidsInsertRowid) {
  3810. sqlite3_reset(p->stmtRowidsInsertRowid);
  3811. sqlite3_clear_bindings(p->stmtRowidsInsertRowid);
  3812. }
  3813. #if SQLITE_THREADSAFE
  3814. if (sqlite3_mutex_leave && entered) {
  3815. sqlite3_mutex_leave(sqlite3_db_mutex(p->db));
  3816. }
  3817. #endif
  3818. return rc;
  3819. }
  3820. int vec0_rowids_insert_id(vec0_vtab *p, sqlite3_value *idValue, i64 *rowid) {
  3821. int rc = SQLITE_OK;
  3822. int entered = 0;
  3823. UNUSED_PARAMETER(entered); // temporary
  3824. if (!p->stmtRowidsInsertId) {
  3825. const char *zSql =
  3826. sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_ROWIDS_NAME "(id)"
  3827. "VALUES (?);",
  3828. p->schemaName, p->tableName);
  3829. if (!zSql) {
  3830. rc = SQLITE_NOMEM;
  3831. goto complete;
  3832. }
  3833. rc = sqlite3_prepare_v2(p->db, zSql, -1, &p->stmtRowidsInsertId, 0);
  3834. sqlite3_free((void *)zSql);
  3835. if (rc != SQLITE_OK) {
  3836. vtab_set_error(&p->base, VEC_INTERAL_ERROR
  3837. "could not initialize 'insert rowids id' statement");
  3838. goto complete;
  3839. }
  3840. }
  3841. #if SQLITE_THREADSAFE
  3842. if (sqlite3_mutex_enter) {
  3843. sqlite3_mutex_enter(sqlite3_db_mutex(p->db));
  3844. entered = 1;
  3845. }
  3846. #endif
  3847. if (idValue) {
  3848. sqlite3_bind_value(p->stmtRowidsInsertId, 1, idValue);
  3849. }
  3850. rc = sqlite3_step(p->stmtRowidsInsertId);
  3851. if (rc != SQLITE_DONE) {
  3852. if (sqlite3_extended_errcode(p->db) == SQLITE_CONSTRAINT_UNIQUE) {
  3853. // IMP: V20497_04568
  3854. vtab_set_error(&p->base, "UNIQUE constraint failed on %s primary key",
  3855. p->tableName);
  3856. } else {
  3857. // IMP: V24016_08086
  3858. // IMP: V15177_32015
  3859. vtab_set_error(&p->base,
  3860. "Error inserting id into rowids shadow table: %s",
  3861. sqlite3_errmsg(sqlite3_db_handle(p->stmtRowidsInsertId)));
  3862. }
  3863. rc = SQLITE_ERROR;
  3864. goto complete;
  3865. }
  3866. *rowid = sqlite3_last_insert_rowid(p->db);
  3867. rc = SQLITE_OK;
  3868. complete:
  3869. if (p->stmtRowidsInsertId) {
  3870. sqlite3_reset(p->stmtRowidsInsertId);
  3871. sqlite3_clear_bindings(p->stmtRowidsInsertId);
  3872. }
  3873. #if SQLITE_THREADSAFE
  3874. if (sqlite3_mutex_leave && entered) {
  3875. sqlite3_mutex_leave(sqlite3_db_mutex(p->db));
  3876. }
  3877. #endif
  3878. return rc;
  3879. }
  3880. int vec0_metadata_chunk_size(vec0_metadata_column_kind kind, int chunk_size) {
  3881. switch(kind) {
  3882. case VEC0_METADATA_COLUMN_KIND_BOOLEAN:
  3883. return chunk_size / 8;
  3884. case VEC0_METADATA_COLUMN_KIND_INTEGER:
  3885. return chunk_size * sizeof(i64);
  3886. case VEC0_METADATA_COLUMN_KIND_FLOAT:
  3887. return chunk_size * sizeof(double);
  3888. case VEC0_METADATA_COLUMN_KIND_TEXT:
  3889. return chunk_size * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH;
  3890. }
  3891. return 0;
  3892. }
  3893. int vec0_rowids_update_position(vec0_vtab *p, i64 rowid, i64 chunk_rowid,
  3894. i64 chunk_offset) {
  3895. int rc = SQLITE_OK;
  3896. if (!p->stmtRowidsUpdatePosition) {
  3897. const char *zSql = sqlite3_mprintf(" UPDATE " VEC0_SHADOW_ROWIDS_NAME
  3898. " SET chunk_id = ?, chunk_offset = ?"
  3899. " WHERE rowid = ?",
  3900. p->schemaName, p->tableName);
  3901. if (!zSql) {
  3902. rc = SQLITE_NOMEM;
  3903. goto cleanup;
  3904. }
  3905. rc = sqlite3_prepare_v2(p->db, zSql, -1, &p->stmtRowidsUpdatePosition, 0);
  3906. sqlite3_free((void *)zSql);
  3907. if (rc != SQLITE_OK) {
  3908. vtab_set_error(&p->base, VEC_INTERAL_ERROR
  3909. "could not initialize 'update rowids position' statement");
  3910. goto cleanup;
  3911. }
  3912. }
  3913. sqlite3_bind_int64(p->stmtRowidsUpdatePosition, 1, chunk_rowid);
  3914. sqlite3_bind_int64(p->stmtRowidsUpdatePosition, 2, chunk_offset);
  3915. sqlite3_bind_int64(p->stmtRowidsUpdatePosition, 3, rowid);
  3916. rc = sqlite3_step(p->stmtRowidsUpdatePosition);
  3917. if (rc != SQLITE_DONE) {
  3918. // IMP: V21925_05995
  3919. vtab_set_error(&p->base,
  3920. VEC_INTERAL_ERROR
  3921. "could not update rowids position for rowid=%lld, "
  3922. "chunk_rowid=%lld, chunk_offset=%lld",
  3923. rowid, chunk_rowid, chunk_offset);
  3924. rc = SQLITE_ERROR;
  3925. goto cleanup;
  3926. }
  3927. rc = SQLITE_OK;
  3928. cleanup:
  3929. if (p->stmtRowidsUpdatePosition) {
  3930. sqlite3_reset(p->stmtRowidsUpdatePosition);
  3931. sqlite3_clear_bindings(p->stmtRowidsUpdatePosition);
  3932. }
  3933. return rc;
  3934. }
  3935. /**
  3936. * @brief Adds a new chunk for the vec0 table, and the corresponding vector
  3937. * chunks.
  3938. *
  3939. * Inserts a new row into the _chunks table, with blank data, and uses that new
  3940. * rowid to insert new blank rows into _vector_chunksXX tables.
  3941. *
  3942. * @param p: vec0 table to add new chunk
  3943. * @param paritionKeyValues: Array of partition key valeus for the new chunk, if available
  3944. * @param chunk_rowid: Output pointer, if not NULL, then will be filled with the
  3945. * new chunk rowid.
  3946. * @return int SQLITE_OK on success, error code otherwise.
  3947. */
  3948. int vec0_new_chunk(vec0_vtab *p, sqlite3_value ** partitionKeyValues, i64 *chunk_rowid) {
  3949. int rc;
  3950. char *zSql;
  3951. sqlite3_stmt *stmt;
  3952. i64 rowid;
  3953. // Step 1: Insert a new row in _chunks, capture that new rowid
  3954. if(p->numPartitionColumns > 0) {
  3955. sqlite3_str * s = sqlite3_str_new(NULL);
  3956. sqlite3_str_appendf(s, "INSERT INTO " VEC0_SHADOW_CHUNKS_NAME, p->schemaName, p->tableName);
  3957. sqlite3_str_appendall(s, "(size, validity, rowids");
  3958. for(int i = 0; i < p->numPartitionColumns; i++) {
  3959. sqlite3_str_appendf(s, ", partition%02d", i);
  3960. }
  3961. sqlite3_str_appendall(s, ") VALUES (?, ?, ?");
  3962. for(int i = 0; i < p->numPartitionColumns; i++) {
  3963. sqlite3_str_appendall(s, ", ?");
  3964. }
  3965. sqlite3_str_appendall(s, ")");
  3966. zSql = sqlite3_str_finish(s);
  3967. }else {
  3968. zSql = sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_CHUNKS_NAME
  3969. "(size, validity, rowids) "
  3970. "VALUES (?, ?, ?);",
  3971. p->schemaName, p->tableName);
  3972. }
  3973. if (!zSql) {
  3974. return SQLITE_NOMEM;
  3975. }
  3976. rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
  3977. sqlite3_free(zSql);
  3978. if (rc != SQLITE_OK) {
  3979. sqlite3_finalize(stmt);
  3980. return rc;
  3981. }
  3982. #if SQLITE_THREADSAFE
  3983. if (sqlite3_mutex_enter) {
  3984. sqlite3_mutex_enter(sqlite3_db_mutex(p->db));
  3985. }
  3986. #endif
  3987. sqlite3_bind_int64(stmt, 1, p->chunk_size); // size
  3988. sqlite3_bind_zeroblob(stmt, 2, p->chunk_size / CHAR_BIT); // validity bitmap
  3989. sqlite3_bind_zeroblob(stmt, 3, p->chunk_size * sizeof(i64)); // rowids
  3990. for(int i = 0; i < p->numPartitionColumns; i++) {
  3991. sqlite3_bind_value(stmt, 4 + i, partitionKeyValues[i]);
  3992. }
  3993. rc = sqlite3_step(stmt);
  3994. int failed = rc != SQLITE_DONE;
  3995. rowid = sqlite3_last_insert_rowid(p->db);
  3996. #if SQLITE_THREADSAFE
  3997. if (sqlite3_mutex_leave) {
  3998. sqlite3_mutex_leave(sqlite3_db_mutex(p->db));
  3999. }
  4000. #endif
  4001. sqlite3_finalize(stmt);
  4002. if (failed) {
  4003. return SQLITE_ERROR;
  4004. }
  4005. // Step 2: Create new vector chunks for each vector column, with
  4006. // that new chunk_rowid.
  4007. for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
  4008. if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_VECTOR) {
  4009. continue;
  4010. }
  4011. int vector_column_idx = p->user_column_idxs[i];
  4012. i64 vectorsSize =
  4013. p->chunk_size * vector_column_byte_size(p->vector_columns[vector_column_idx]);
  4014. zSql = sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_VECTOR_N_NAME
  4015. "(rowid, vectors)"
  4016. "VALUES (?, ?)",
  4017. p->schemaName, p->tableName, vector_column_idx);
  4018. if (!zSql) {
  4019. return SQLITE_NOMEM;
  4020. }
  4021. rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
  4022. sqlite3_free(zSql);
  4023. if (rc != SQLITE_OK) {
  4024. sqlite3_finalize(stmt);
  4025. return rc;
  4026. }
  4027. sqlite3_bind_int64(stmt, 1, rowid);
  4028. sqlite3_bind_zeroblob64(stmt, 2, vectorsSize);
  4029. rc = sqlite3_step(stmt);
  4030. sqlite3_finalize(stmt);
  4031. if (rc != SQLITE_DONE) {
  4032. return rc;
  4033. }
  4034. }
  4035. // Step 3: Create new metadata chunks for each metadata column
  4036. for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
  4037. if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_METADATA) {
  4038. continue;
  4039. }
  4040. int metadata_column_idx = p->user_column_idxs[i];
  4041. zSql = sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_METADATA_N_NAME
  4042. "(rowid, data)"
  4043. "VALUES (?, ?)",
  4044. p->schemaName, p->tableName, metadata_column_idx);
  4045. if (!zSql) {
  4046. return SQLITE_NOMEM;
  4047. }
  4048. rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
  4049. sqlite3_free(zSql);
  4050. if (rc != SQLITE_OK) {
  4051. sqlite3_finalize(stmt);
  4052. return rc;
  4053. }
  4054. sqlite3_bind_int64(stmt, 1, rowid);
  4055. sqlite3_bind_zeroblob64(stmt, 2, vec0_metadata_chunk_size(p->metadata_columns[metadata_column_idx].kind, p->chunk_size));
  4056. rc = sqlite3_step(stmt);
  4057. sqlite3_finalize(stmt);
  4058. if (rc != SQLITE_DONE) {
  4059. return rc;
  4060. }
  4061. }
  4062. if (chunk_rowid) {
  4063. *chunk_rowid = rowid;
  4064. }
  4065. return SQLITE_OK;
  4066. }
  4067. struct vec0_query_fullscan_data {
  4068. sqlite3_stmt *rowids_stmt;
  4069. i8 done;
  4070. };
  4071. void vec0_query_fullscan_data_clear(
  4072. struct vec0_query_fullscan_data *fullscan_data) {
  4073. if (!fullscan_data)
  4074. return;
  4075. if (fullscan_data->rowids_stmt) {
  4076. sqlite3_finalize(fullscan_data->rowids_stmt);
  4077. fullscan_data->rowids_stmt = NULL;
  4078. }
  4079. }
  4080. struct vec0_query_knn_data {
  4081. i64 k;
  4082. i64 k_used;
  4083. // Array of rowids of size k. Must be freed with sqlite3_free().
  4084. i64 *rowids;
  4085. // Array of distances of size k. Must be freed with sqlite3_free().
  4086. f32 *distances;
  4087. i64 current_idx;
  4088. };
  4089. void vec0_query_knn_data_clear(struct vec0_query_knn_data *knn_data) {
  4090. if (!knn_data)
  4091. return;
  4092. if (knn_data->rowids) {
  4093. sqlite3_free(knn_data->rowids);
  4094. knn_data->rowids = NULL;
  4095. }
  4096. if (knn_data->distances) {
  4097. sqlite3_free(knn_data->distances);
  4098. knn_data->distances = NULL;
  4099. }
  4100. }
  4101. struct vec0_query_point_data {
  4102. i64 rowid;
  4103. void *vectors[VEC0_MAX_VECTOR_COLUMNS];
  4104. int done;
  4105. };
  4106. void vec0_query_point_data_clear(struct vec0_query_point_data *point_data) {
  4107. if (!point_data)
  4108. return;
  4109. for (int i = 0; i < VEC0_MAX_VECTOR_COLUMNS; i++) {
  4110. sqlite3_free(point_data->vectors[i]);
  4111. point_data->vectors[i] = NULL;
  4112. }
  4113. }
  4114. typedef enum {
  4115. // If any values are updated, please update the ARCHITECTURE.md docs accordingly!
  4116. VEC0_QUERY_PLAN_FULLSCAN = '1',
  4117. VEC0_QUERY_PLAN_POINT = '2',
  4118. VEC0_QUERY_PLAN_KNN = '3',
  4119. } vec0_query_plan;
  4120. typedef struct vec0_cursor vec0_cursor;
  4121. struct vec0_cursor {
  4122. sqlite3_vtab_cursor base;
  4123. vec0_query_plan query_plan;
  4124. struct vec0_query_fullscan_data *fullscan_data;
  4125. struct vec0_query_knn_data *knn_data;
  4126. struct vec0_query_point_data *point_data;
  4127. };
  4128. void vec0_cursor_clear(vec0_cursor *pCur) {
  4129. if (pCur->fullscan_data) {
  4130. vec0_query_fullscan_data_clear(pCur->fullscan_data);
  4131. sqlite3_free(pCur->fullscan_data);
  4132. pCur->fullscan_data = NULL;
  4133. }
  4134. if (pCur->knn_data) {
  4135. vec0_query_knn_data_clear(pCur->knn_data);
  4136. sqlite3_free(pCur->knn_data);
  4137. pCur->knn_data = NULL;
  4138. }
  4139. if (pCur->point_data) {
  4140. vec0_query_point_data_clear(pCur->point_data);
  4141. sqlite3_free(pCur->point_data);
  4142. pCur->point_data = NULL;
  4143. }
  4144. }
  4145. #define VEC_CONSTRUCTOR_ERROR "vec0 constructor error: "
  4146. static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv,
  4147. sqlite3_vtab **ppVtab, char **pzErr, bool isCreate) {
  4148. UNUSED_PARAMETER(pAux);
  4149. vec0_vtab *pNew;
  4150. int rc;
  4151. const char *zSql;
  4152. pNew = sqlite3_malloc(sizeof(*pNew));
  4153. if (pNew == 0)
  4154. return SQLITE_NOMEM;
  4155. memset(pNew, 0, sizeof(*pNew));
  4156. // Declared chunk_size=N for entire table.
  4157. // -1 to use the defualt, otherwise will get re-assigned on `chunk_size=N`
  4158. // option
  4159. int chunk_size = -1;
  4160. int numVectorColumns = 0;
  4161. int numPartitionColumns = 0;
  4162. int numAuxiliaryColumns = 0;
  4163. int numMetadataColumns = 0;
  4164. int user_column_idx = 0;
  4165. // track if a "primary key" column is defined
  4166. char *pkColumnName = NULL;
  4167. int pkColumnNameLength;
  4168. int pkColumnType = SQLITE_INTEGER;
  4169. for (int i = 3; i < argc; i++) {
  4170. struct VectorColumnDefinition vecColumn;
  4171. struct Vec0PartitionColumnDefinition partitionColumn;
  4172. struct Vec0AuxiliaryColumnDefinition auxColumn;
  4173. struct Vec0MetadataColumnDefinition metadataColumn;
  4174. char *cName = NULL;
  4175. int cNameLength;
  4176. int cType;
  4177. // Scenario #1: Constructor argument is a vector column definition, ie `foo float[1024]`
  4178. rc = vec0_parse_vector_column(argv[i], strlen(argv[i]), &vecColumn);
  4179. if (rc == SQLITE_ERROR) {
  4180. *pzErr = sqlite3_mprintf(
  4181. VEC_CONSTRUCTOR_ERROR "could not parse vector column '%s'", argv[i]);
  4182. goto error;
  4183. }
  4184. if (rc == SQLITE_OK) {
  4185. if (numVectorColumns >= VEC0_MAX_VECTOR_COLUMNS) {
  4186. sqlite3_free(vecColumn.name);
  4187. *pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
  4188. "Too many provided vector columns, maximum %d",
  4189. VEC0_MAX_VECTOR_COLUMNS);
  4190. goto error;
  4191. }
  4192. if (vecColumn.dimensions > SQLITE_VEC_VEC0_MAX_DIMENSIONS) {
  4193. sqlite3_free(vecColumn.name);
  4194. *pzErr = sqlite3_mprintf(
  4195. VEC_CONSTRUCTOR_ERROR
  4196. "Dimension on vector column too large, provided %lld, maximum %lld",
  4197. (i64)vecColumn.dimensions, SQLITE_VEC_VEC0_MAX_DIMENSIONS);
  4198. goto error;
  4199. }
  4200. pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_VECTOR;
  4201. pNew->user_column_idxs[user_column_idx] = numVectorColumns;
  4202. memcpy(&pNew->vector_columns[numVectorColumns], &vecColumn, sizeof(vecColumn));
  4203. numVectorColumns++;
  4204. user_column_idx++;
  4205. continue;
  4206. }
  4207. // Scenario #2: Constructor argument is a partition key column definition, ie `user_id text partition key`
  4208. rc = vec0_parse_partition_key_definition(argv[i], strlen(argv[i]), &cName,
  4209. &cNameLength, &cType);
  4210. if (rc == SQLITE_OK) {
  4211. if (numPartitionColumns >= VEC0_MAX_PARTITION_COLUMNS) {
  4212. *pzErr = sqlite3_mprintf(
  4213. VEC_CONSTRUCTOR_ERROR
  4214. "More than %d partition key columns were provided",
  4215. VEC0_MAX_PARTITION_COLUMNS);
  4216. goto error;
  4217. }
  4218. partitionColumn.type = cType;
  4219. partitionColumn.name_length = cNameLength;
  4220. partitionColumn.name = sqlite3_mprintf("%.*s", cNameLength, cName);
  4221. if(!partitionColumn.name) {
  4222. rc = SQLITE_NOMEM;
  4223. goto error;
  4224. }
  4225. pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_PARTITION;
  4226. pNew->user_column_idxs[user_column_idx] = numPartitionColumns;
  4227. memcpy(&pNew->paritition_columns[numPartitionColumns], &partitionColumn, sizeof(partitionColumn));
  4228. numPartitionColumns++;
  4229. user_column_idx++;
  4230. continue;
  4231. }
  4232. // Scenario #3: Constructor argument is a primary key column definition, ie `article_id text primary key`
  4233. rc = vec0_parse_primary_key_definition(argv[i], strlen(argv[i]), &cName,
  4234. &cNameLength, &cType);
  4235. if (rc == SQLITE_OK) {
  4236. if (pkColumnName) {
  4237. *pzErr = sqlite3_mprintf(
  4238. VEC_CONSTRUCTOR_ERROR
  4239. "More than one primary key definition was provided, vec0 only "
  4240. "suports a single primary key column",
  4241. argv[i]);
  4242. goto error;
  4243. }
  4244. pkColumnName = cName;
  4245. pkColumnNameLength = cNameLength;
  4246. pkColumnType = cType;
  4247. continue;
  4248. }
  4249. // Scenario #4: Constructor argument is a auxiliary column definition, ie `+contents text`
  4250. rc = vec0_parse_auxiliary_column_definition(argv[i], strlen(argv[i]), &cName,
  4251. &cNameLength, &cType);
  4252. if(rc == SQLITE_OK) {
  4253. if (numAuxiliaryColumns >= VEC0_MAX_AUXILIARY_COLUMNS) {
  4254. *pzErr = sqlite3_mprintf(
  4255. VEC_CONSTRUCTOR_ERROR
  4256. "More than %d auxiliary columns were provided",
  4257. VEC0_MAX_AUXILIARY_COLUMNS);
  4258. goto error;
  4259. }
  4260. auxColumn.type = cType;
  4261. auxColumn.name_length = cNameLength;
  4262. auxColumn.name = sqlite3_mprintf("%.*s", cNameLength, cName);
  4263. if(!auxColumn.name) {
  4264. rc = SQLITE_NOMEM;
  4265. goto error;
  4266. }
  4267. pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY;
  4268. pNew->user_column_idxs[user_column_idx] = numAuxiliaryColumns;
  4269. memcpy(&pNew->auxiliary_columns[numAuxiliaryColumns], &auxColumn, sizeof(auxColumn));
  4270. numAuxiliaryColumns++;
  4271. user_column_idx++;
  4272. continue;
  4273. }
  4274. vec0_metadata_column_kind kind;
  4275. rc = vec0_parse_metadata_column_definition(argv[i], strlen(argv[i]), &cName,
  4276. &cNameLength, &kind);
  4277. if(rc == SQLITE_OK) {
  4278. if (numMetadataColumns >= VEC0_MAX_METADATA_COLUMNS) {
  4279. *pzErr = sqlite3_mprintf(
  4280. VEC_CONSTRUCTOR_ERROR
  4281. "More than %d metadata columns were provided",
  4282. VEC0_MAX_METADATA_COLUMNS);
  4283. goto error;
  4284. }
  4285. metadataColumn.kind = kind;
  4286. metadataColumn.name_length = cNameLength;
  4287. metadataColumn.name = sqlite3_mprintf("%.*s", cNameLength, cName);
  4288. if(!metadataColumn.name) {
  4289. rc = SQLITE_NOMEM;
  4290. goto error;
  4291. }
  4292. pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_METADATA;
  4293. pNew->user_column_idxs[user_column_idx] = numMetadataColumns;
  4294. memcpy(&pNew->metadata_columns[numMetadataColumns], &metadataColumn, sizeof(metadataColumn));
  4295. numMetadataColumns++;
  4296. user_column_idx++;
  4297. continue;
  4298. }
  4299. // Scenario #4: Constructor argument is a table-level option, ie `chunk_size`
  4300. char *key;
  4301. char *value;
  4302. int keyLength, valueLength;
  4303. rc = vec0_parse_table_option(argv[i], strlen(argv[i]), &key, &keyLength,
  4304. &value, &valueLength);
  4305. if (rc == SQLITE_ERROR) {
  4306. *pzErr = sqlite3_mprintf(
  4307. VEC_CONSTRUCTOR_ERROR "could not parse table option '%s'", argv[i]);
  4308. goto error;
  4309. }
  4310. if (rc == SQLITE_OK) {
  4311. if (sqlite3_strnicmp(key, "chunk_size", keyLength) == 0) {
  4312. chunk_size = atoi(value);
  4313. if (chunk_size <= 0) {
  4314. // IMP: V01931_18769
  4315. *pzErr =
  4316. sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
  4317. "chunk_size must be a non-zero positive integer");
  4318. goto error;
  4319. }
  4320. if ((chunk_size % 8) != 0) {
  4321. // IMP: V14110_30948
  4322. *pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
  4323. "chunk_size must be divisible by 8");
  4324. goto error;
  4325. }
  4326. #define SQLITE_VEC_CHUNK_SIZE_MAX 4096
  4327. if (chunk_size > SQLITE_VEC_CHUNK_SIZE_MAX) {
  4328. *pzErr =
  4329. sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR "chunk_size too large");
  4330. goto error;
  4331. }
  4332. } else {
  4333. // IMP: V27642_11712
  4334. *pzErr = sqlite3_mprintf(
  4335. VEC_CONSTRUCTOR_ERROR "Unknown table option: %.*s", keyLength, key);
  4336. goto error;
  4337. }
  4338. continue;
  4339. }
  4340. // Scenario #5: Unknown constructor argument
  4341. *pzErr =
  4342. sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR "Could not parse '%s'", argv[i]);
  4343. goto error;
  4344. }
  4345. if (chunk_size < 0) {
  4346. chunk_size = 1024;
  4347. }
  4348. if (numVectorColumns <= 0) {
  4349. *pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
  4350. "At least one vector column is required");
  4351. goto error;
  4352. }
  4353. sqlite3_str *createStr = sqlite3_str_new(NULL);
  4354. sqlite3_str_appendall(createStr, "CREATE TABLE x(");
  4355. if (pkColumnName) {
  4356. sqlite3_str_appendf(createStr, "\"%.*w\" primary key, ", pkColumnNameLength,
  4357. pkColumnName);
  4358. } else {
  4359. sqlite3_str_appendall(createStr, "rowid, ");
  4360. }
  4361. for (int i = 0; i < numVectorColumns + numPartitionColumns + numAuxiliaryColumns + numMetadataColumns; i++) {
  4362. switch(pNew->user_column_kinds[i]) {
  4363. case SQLITE_VEC0_USER_COLUMN_KIND_VECTOR: {
  4364. int vector_idx = pNew->user_column_idxs[i];
  4365. sqlite3_str_appendf(createStr, "\"%.*w\", ",
  4366. pNew->vector_columns[vector_idx].name_length,
  4367. pNew->vector_columns[vector_idx].name);
  4368. break;
  4369. }
  4370. case SQLITE_VEC0_USER_COLUMN_KIND_PARTITION: {
  4371. int partition_idx = pNew->user_column_idxs[i];
  4372. sqlite3_str_appendf(createStr, "\"%.*w\", ",
  4373. pNew->paritition_columns[partition_idx].name_length,
  4374. pNew->paritition_columns[partition_idx].name);
  4375. break;
  4376. }
  4377. case SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY: {
  4378. int auxiliary_idx = pNew->user_column_idxs[i];
  4379. sqlite3_str_appendf(createStr, "\"%.*w\", ",
  4380. pNew->auxiliary_columns[auxiliary_idx].name_length,
  4381. pNew->auxiliary_columns[auxiliary_idx].name);
  4382. break;
  4383. }
  4384. case SQLITE_VEC0_USER_COLUMN_KIND_METADATA: {
  4385. int metadata_idx = pNew->user_column_idxs[i];
  4386. sqlite3_str_appendf(createStr, "\"%.*w\", ",
  4387. pNew->metadata_columns[metadata_idx].name_length,
  4388. pNew->metadata_columns[metadata_idx].name);
  4389. break;
  4390. }
  4391. }
  4392. }
  4393. sqlite3_str_appendall(createStr, " distance hidden, k hidden) ");
  4394. if (pkColumnName) {
  4395. sqlite3_str_appendall(createStr, "without rowid ");
  4396. }
  4397. zSql = sqlite3_str_finish(createStr);
  4398. if (!zSql) {
  4399. goto error;
  4400. }
  4401. rc = sqlite3_declare_vtab(db, zSql);
  4402. sqlite3_free((void *)zSql);
  4403. if (rc != SQLITE_OK) {
  4404. *pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
  4405. "could not declare virtual table, '%s'",
  4406. sqlite3_errmsg(db));
  4407. goto error;
  4408. }
  4409. const char *schemaName = argv[1];
  4410. const char *tableName = argv[2];
  4411. pNew->db = db;
  4412. pNew->pkIsText = pkColumnType == SQLITE_TEXT;
  4413. pNew->schemaName = sqlite3_mprintf("%s", schemaName);
  4414. if (!pNew->schemaName) {
  4415. goto error;
  4416. }
  4417. pNew->tableName = sqlite3_mprintf("%s", tableName);
  4418. if (!pNew->tableName) {
  4419. goto error;
  4420. }
  4421. pNew->shadowRowidsName = sqlite3_mprintf("%s_rowids", tableName);
  4422. if (!pNew->shadowRowidsName) {
  4423. goto error;
  4424. }
  4425. pNew->shadowChunksName = sqlite3_mprintf("%s_chunks", tableName);
  4426. if (!pNew->shadowChunksName) {
  4427. goto error;
  4428. }
  4429. pNew->numVectorColumns = numVectorColumns;
  4430. pNew->numPartitionColumns = numPartitionColumns;
  4431. pNew->numAuxiliaryColumns = numAuxiliaryColumns;
  4432. pNew->numMetadataColumns = numMetadataColumns;
  4433. for (int i = 0; i < pNew->numVectorColumns; i++) {
  4434. pNew->shadowVectorChunksNames[i] =
  4435. sqlite3_mprintf("%s_vector_chunks%02d", tableName, i);
  4436. if (!pNew->shadowVectorChunksNames[i]) {
  4437. goto error;
  4438. }
  4439. }
  4440. for (int i = 0; i < pNew->numMetadataColumns; i++) {
  4441. pNew->shadowMetadataChunksNames[i] =
  4442. sqlite3_mprintf("%s_metadatachunks%02d", tableName, i);
  4443. if (!pNew->shadowMetadataChunksNames[i]) {
  4444. goto error;
  4445. }
  4446. }
  4447. pNew->chunk_size = chunk_size;
  4448. // if xCreate, then create the necessary shadow tables
  4449. if (isCreate) {
  4450. sqlite3_stmt *stmt;
  4451. int rc;
  4452. char * zCreateInfo = sqlite3_mprintf("CREATE TABLE "VEC0_SHADOW_INFO_NAME " (key text primary key, value any)", pNew->schemaName, pNew->tableName);
  4453. if(!zCreateInfo) {
  4454. goto error;
  4455. }
  4456. rc = sqlite3_prepare_v2(db, zCreateInfo, -1, &stmt, NULL);
  4457. sqlite3_free((void *) zCreateInfo);
  4458. if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
  4459. // TODO(IMP)
  4460. sqlite3_finalize(stmt);
  4461. *pzErr = sqlite3_mprintf("Could not create '_info' shadow table: %s",
  4462. sqlite3_errmsg(db));
  4463. goto error;
  4464. }
  4465. sqlite3_finalize(stmt);
  4466. char * zSeedInfo = sqlite3_mprintf(
  4467. "INSERT INTO "VEC0_SHADOW_INFO_NAME "(key, value) VALUES "
  4468. "(?1, ?2), (?3, ?4), (?5, ?6), (?7, ?8) ",
  4469. pNew->schemaName, pNew->tableName
  4470. );
  4471. if(!zSeedInfo) {
  4472. goto error;
  4473. }
  4474. rc = sqlite3_prepare_v2(db, zSeedInfo, -1, &stmt, NULL);
  4475. sqlite3_free((void *) zSeedInfo);
  4476. if (rc != SQLITE_OK) {
  4477. // TODO(IMP)
  4478. sqlite3_finalize(stmt);
  4479. *pzErr = sqlite3_mprintf("Could not seed '_info' shadow table: %s",
  4480. sqlite3_errmsg(db));
  4481. goto error;
  4482. }
  4483. sqlite3_bind_text(stmt, 1, "CREATE_VERSION", -1, SQLITE_STATIC);
  4484. sqlite3_bind_text(stmt, 2, SQLITE_VEC_VERSION, -1, SQLITE_STATIC);
  4485. sqlite3_bind_text(stmt, 3, "CREATE_VERSION_MAJOR", -1, SQLITE_STATIC);
  4486. sqlite3_bind_int(stmt, 4, SQLITE_VEC_VERSION_MAJOR);
  4487. sqlite3_bind_text(stmt, 5, "CREATE_VERSION_MINOR", -1, SQLITE_STATIC);
  4488. sqlite3_bind_int(stmt, 6, SQLITE_VEC_VERSION_MINOR);
  4489. sqlite3_bind_text(stmt, 7, "CREATE_VERSION_PATCH", -1, SQLITE_STATIC);
  4490. sqlite3_bind_int(stmt, 8, SQLITE_VEC_VERSION_PATCH);
  4491. if(sqlite3_step(stmt) != SQLITE_DONE) {
  4492. // TODO(IMP)
  4493. sqlite3_finalize(stmt);
  4494. *pzErr = sqlite3_mprintf("Could not seed '_info' shadow table: %s",
  4495. sqlite3_errmsg(db));
  4496. goto error;
  4497. }
  4498. sqlite3_finalize(stmt);
  4499. // create the _chunks shadow table
  4500. char *zCreateShadowChunks = NULL;
  4501. if(pNew->numPartitionColumns) {
  4502. sqlite3_str * s = sqlite3_str_new(NULL);
  4503. sqlite3_str_appendf(s, "CREATE TABLE " VEC0_SHADOW_CHUNKS_NAME "(", pNew->schemaName, pNew->tableName);
  4504. sqlite3_str_appendall(s, "chunk_id INTEGER PRIMARY KEY AUTOINCREMENT," "size INTEGER NOT NULL,");
  4505. sqlite3_str_appendall(s, "sequence_id integer,");
  4506. for(int i = 0; i < pNew->numPartitionColumns;i++) {
  4507. sqlite3_str_appendf(s, "partition%02d,", i);
  4508. }
  4509. sqlite3_str_appendall(s, "validity BLOB NOT NULL, rowids BLOB NOT NULL);");
  4510. zCreateShadowChunks = sqlite3_str_finish(s);
  4511. }else {
  4512. zCreateShadowChunks = sqlite3_mprintf(VEC0_SHADOW_CHUNKS_CREATE,
  4513. pNew->schemaName, pNew->tableName);
  4514. }
  4515. if (!zCreateShadowChunks) {
  4516. goto error;
  4517. }
  4518. rc = sqlite3_prepare_v2(db, zCreateShadowChunks, -1, &stmt, 0);
  4519. sqlite3_free((void *)zCreateShadowChunks);
  4520. if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
  4521. // IMP: V17740_01811
  4522. sqlite3_finalize(stmt);
  4523. *pzErr = sqlite3_mprintf("Could not create '_chunks' shadow table: %s",
  4524. sqlite3_errmsg(db));
  4525. goto error;
  4526. }
  4527. sqlite3_finalize(stmt);
  4528. // create the _rowids shadow table
  4529. char *zCreateShadowRowids;
  4530. if (pNew->pkIsText) {
  4531. // adds a "text unique not null" constraint to the id column
  4532. zCreateShadowRowids = sqlite3_mprintf(VEC0_SHADOW_ROWIDS_CREATE_PK_TEXT,
  4533. pNew->schemaName, pNew->tableName);
  4534. } else {
  4535. zCreateShadowRowids = sqlite3_mprintf(VEC0_SHADOW_ROWIDS_CREATE_BASIC,
  4536. pNew->schemaName, pNew->tableName);
  4537. }
  4538. if (!zCreateShadowRowids) {
  4539. goto error;
  4540. }
  4541. rc = sqlite3_prepare_v2(db, zCreateShadowRowids, -1, &stmt, 0);
  4542. sqlite3_free((void *)zCreateShadowRowids);
  4543. if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
  4544. // IMP: V11631_28470
  4545. sqlite3_finalize(stmt);
  4546. *pzErr = sqlite3_mprintf("Could not create '_rowids' shadow table: %s",
  4547. sqlite3_errmsg(db));
  4548. goto error;
  4549. }
  4550. sqlite3_finalize(stmt);
  4551. for (int i = 0; i < pNew->numVectorColumns; i++) {
  4552. char *zSql = sqlite3_mprintf(VEC0_SHADOW_VECTOR_N_CREATE,
  4553. pNew->schemaName, pNew->tableName, i);
  4554. if (!zSql) {
  4555. goto error;
  4556. }
  4557. rc = sqlite3_prepare_v2(db, zSql, -1, &stmt, 0);
  4558. sqlite3_free((void *)zSql);
  4559. if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
  4560. // IMP: V25919_09989
  4561. sqlite3_finalize(stmt);
  4562. *pzErr = sqlite3_mprintf(
  4563. "Could not create '_vector_chunks%02d' shadow table: %s", i,
  4564. sqlite3_errmsg(db));
  4565. goto error;
  4566. }
  4567. sqlite3_finalize(stmt);
  4568. }
  4569. for (int i = 0; i < pNew->numMetadataColumns; i++) {
  4570. char *zSql = sqlite3_mprintf("CREATE TABLE " VEC0_SHADOW_METADATA_N_NAME "(rowid PRIMARY KEY, data BLOB NOT NULL);",
  4571. pNew->schemaName, pNew->tableName, i);
  4572. if (!zSql) {
  4573. goto error;
  4574. }
  4575. rc = sqlite3_prepare_v2(db, zSql, -1, &stmt, 0);
  4576. sqlite3_free((void *)zSql);
  4577. if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
  4578. sqlite3_finalize(stmt);
  4579. *pzErr = sqlite3_mprintf(
  4580. "Could not create '_metata_chunks%02d' shadow table: %s", i,
  4581. sqlite3_errmsg(db));
  4582. goto error;
  4583. }
  4584. sqlite3_finalize(stmt);
  4585. if(pNew->metadata_columns[i].kind == VEC0_METADATA_COLUMN_KIND_TEXT) {
  4586. char *zSql = sqlite3_mprintf("CREATE TABLE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME "(rowid PRIMARY KEY, data TEXT);",
  4587. pNew->schemaName, pNew->tableName, i);
  4588. if (!zSql) {
  4589. goto error;
  4590. }
  4591. rc = sqlite3_prepare_v2(db, zSql, -1, &stmt, 0);
  4592. sqlite3_free((void *)zSql);
  4593. if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
  4594. sqlite3_finalize(stmt);
  4595. *pzErr = sqlite3_mprintf(
  4596. "Could not create '_metadatatext%02d' shadow table: %s", i,
  4597. sqlite3_errmsg(db));
  4598. goto error;
  4599. }
  4600. sqlite3_finalize(stmt);
  4601. }
  4602. }
  4603. if(pNew->numAuxiliaryColumns > 0) {
  4604. sqlite3_stmt * stmt;
  4605. sqlite3_str * s = sqlite3_str_new(NULL);
  4606. sqlite3_str_appendf(s, "CREATE TABLE " VEC0_SHADOW_AUXILIARY_NAME "( rowid integer PRIMARY KEY ", pNew->schemaName, pNew->tableName);
  4607. for(int i = 0; i < pNew->numAuxiliaryColumns; i++) {
  4608. sqlite3_str_appendf(s, ", value%02d", i);
  4609. }
  4610. sqlite3_str_appendall(s, ")");
  4611. char *zSql = sqlite3_str_finish(s);
  4612. if(!zSql) {
  4613. goto error;
  4614. }
  4615. rc = sqlite3_prepare_v2(db, zSql, -1, &stmt, NULL);
  4616. if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
  4617. sqlite3_finalize(stmt);
  4618. *pzErr = sqlite3_mprintf(
  4619. "Could not create auxiliary shadow table: %s",
  4620. sqlite3_errmsg(db));
  4621. goto error;
  4622. }
  4623. sqlite3_finalize(stmt);
  4624. }
  4625. }
  4626. *ppVtab = (sqlite3_vtab *)pNew;
  4627. return SQLITE_OK;
  4628. error:
  4629. vec0_free(pNew);
  4630. return SQLITE_ERROR;
  4631. }
  4632. static int vec0Create(sqlite3 *db, void *pAux, int argc,
  4633. const char *const *argv, sqlite3_vtab **ppVtab,
  4634. char **pzErr) {
  4635. return vec0_init(db, pAux, argc, argv, ppVtab, pzErr, true);
  4636. }
  4637. static int vec0Connect(sqlite3 *db, void *pAux, int argc,
  4638. const char *const *argv, sqlite3_vtab **ppVtab,
  4639. char **pzErr) {
  4640. return vec0_init(db, pAux, argc, argv, ppVtab, pzErr, false);
  4641. }
  4642. static int vec0Disconnect(sqlite3_vtab *pVtab) {
  4643. vec0_vtab *p = (vec0_vtab *)pVtab;
  4644. vec0_free(p);
  4645. sqlite3_free(p);
  4646. return SQLITE_OK;
  4647. }
  4648. static int vec0Destroy(sqlite3_vtab *pVtab) {
  4649. vec0_vtab *p = (vec0_vtab *)pVtab;
  4650. sqlite3_stmt *stmt;
  4651. int rc;
  4652. const char *zSql;
  4653. // Free up any sqlite3_stmt, otherwise DROPs on those tables will fail
  4654. vec0_free_resources(p);
  4655. // TODO(test) later: can't evidence-of here, bc always gives "SQL logic error" instead of
  4656. // provided error
  4657. zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_CHUNKS_NAME, p->schemaName,
  4658. p->tableName);
  4659. rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
  4660. sqlite3_free((void *)zSql);
  4661. if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
  4662. rc = SQLITE_ERROR;
  4663. vtab_set_error(pVtab, "could not drop chunks shadow table");
  4664. goto done;
  4665. }
  4666. sqlite3_finalize(stmt);
  4667. zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_INFO_NAME, p->schemaName,
  4668. p->tableName);
  4669. rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
  4670. sqlite3_free((void *)zSql);
  4671. if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
  4672. rc = SQLITE_ERROR;
  4673. vtab_set_error(pVtab, "could not drop info shadow table");
  4674. goto done;
  4675. }
  4676. sqlite3_finalize(stmt);
  4677. zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_ROWIDS_NAME, p->schemaName,
  4678. p->tableName);
  4679. rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
  4680. sqlite3_free((void *)zSql);
  4681. if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
  4682. rc = SQLITE_ERROR;
  4683. goto done;
  4684. }
  4685. sqlite3_finalize(stmt);
  4686. for (int i = 0; i < p->numVectorColumns; i++) {
  4687. zSql = sqlite3_mprintf("DROP TABLE \"%w\".\"%w\"", p->schemaName,
  4688. p->shadowVectorChunksNames[i]);
  4689. rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
  4690. sqlite3_free((void *)zSql);
  4691. if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
  4692. rc = SQLITE_ERROR;
  4693. goto done;
  4694. }
  4695. sqlite3_finalize(stmt);
  4696. }
  4697. if(p->numAuxiliaryColumns > 0) {
  4698. zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_AUXILIARY_NAME, p->schemaName, p->tableName);
  4699. rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
  4700. sqlite3_free((void *)zSql);
  4701. if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
  4702. rc = SQLITE_ERROR;
  4703. goto done;
  4704. }
  4705. sqlite3_finalize(stmt);
  4706. }
  4707. for (int i = 0; i < p->numMetadataColumns; i++) {
  4708. zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_METADATA_N_NAME, p->schemaName,p->tableName, i);
  4709. rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
  4710. sqlite3_free((void *)zSql);
  4711. if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
  4712. rc = SQLITE_ERROR;
  4713. goto done;
  4714. }
  4715. sqlite3_finalize(stmt);
  4716. if(p->metadata_columns[i].kind == VEC0_METADATA_COLUMN_KIND_TEXT) {
  4717. zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME, p->schemaName,p->tableName, i);
  4718. rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
  4719. sqlite3_free((void *)zSql);
  4720. if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
  4721. rc = SQLITE_ERROR;
  4722. goto done;
  4723. }
  4724. sqlite3_finalize(stmt);
  4725. }
  4726. }
  4727. stmt = NULL;
  4728. rc = SQLITE_OK;
  4729. done:
  4730. sqlite3_finalize(stmt);
  4731. vec0_free(p);
  4732. // If there was an error
  4733. if (rc == SQLITE_OK) {
  4734. sqlite3_free(p);
  4735. }
  4736. return rc;
  4737. }
  4738. static int vec0Open(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) {
  4739. UNUSED_PARAMETER(p);
  4740. vec0_cursor *pCur;
  4741. pCur = sqlite3_malloc(sizeof(*pCur));
  4742. if (pCur == 0)
  4743. return SQLITE_NOMEM;
  4744. memset(pCur, 0, sizeof(*pCur));
  4745. *ppCursor = &pCur->base;
  4746. return SQLITE_OK;
  4747. }
  4748. static int vec0Close(sqlite3_vtab_cursor *cur) {
  4749. vec0_cursor *pCur = (vec0_cursor *)cur;
  4750. vec0_cursor_clear(pCur);
  4751. sqlite3_free(pCur);
  4752. return SQLITE_OK;
  4753. }
  4754. // All the different type of "values" provided to argv/argc in vec0Filter.
  4755. // These enums denote the use and purpose of all of them.
  4756. typedef enum {
  4757. // If any values are updated, please update the ARCHITECTURE.md docs accordingly!
  4758. VEC0_IDXSTR_KIND_KNN_MATCH = '{',
  4759. VEC0_IDXSTR_KIND_KNN_K = '}',
  4760. VEC0_IDXSTR_KIND_KNN_ROWID_IN = '[',
  4761. VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT = ']',
  4762. VEC0_IDXSTR_KIND_POINT_ID = '!',
  4763. VEC0_IDXSTR_KIND_METADATA_CONSTRAINT = '&',
  4764. } vec0_idxstr_kind;
  4765. // The different SQLITE_INDEX_CONSTRAINT values that vec0 partition key columns
  4766. // support, but as characters that fit nicely in idxstr.
  4767. typedef enum {
  4768. // If any values are updated, please update the ARCHITECTURE.md docs accordingly!
  4769. VEC0_PARTITION_OPERATOR_EQ = 'a',
  4770. VEC0_PARTITION_OPERATOR_GT = 'b',
  4771. VEC0_PARTITION_OPERATOR_LE = 'c',
  4772. VEC0_PARTITION_OPERATOR_LT = 'd',
  4773. VEC0_PARTITION_OPERATOR_GE = 'e',
  4774. VEC0_PARTITION_OPERATOR_NE = 'f',
  4775. } vec0_partition_operator;
  4776. typedef enum {
  4777. VEC0_METADATA_OPERATOR_EQ = 'a',
  4778. VEC0_METADATA_OPERATOR_GT = 'b',
  4779. VEC0_METADATA_OPERATOR_LE = 'c',
  4780. VEC0_METADATA_OPERATOR_LT = 'd',
  4781. VEC0_METADATA_OPERATOR_GE = 'e',
  4782. VEC0_METADATA_OPERATOR_NE = 'f',
  4783. VEC0_METADATA_OPERATOR_IN = 'g',
  4784. } vec0_metadata_operator;
  4785. static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
  4786. vec0_vtab *p = (vec0_vtab *)pVTab;
  4787. /**
  4788. * Possible query plans are:
  4789. * 1. KNN when:
  4790. * a) An `MATCH` op on vector column
  4791. * b) ORDER BY on distance column
  4792. * c) LIMIT
  4793. * d) rowid in (...) OPTIONAL
  4794. * 2. Point when:
  4795. * a) An `EQ` op on rowid column
  4796. * 3. else: fullscan
  4797. *
  4798. */
  4799. int iMatchTerm = -1;
  4800. int iMatchVectorTerm = -1;
  4801. int iLimitTerm = -1;
  4802. int iRowidTerm = -1;
  4803. int iKTerm = -1;
  4804. int iRowidInTerm = -1;
  4805. int hasAuxConstraint = 0;
  4806. #ifdef SQLITE_VEC_DEBUG
  4807. printf("pIdxInfo->nOrderBy=%d, pIdxInfo->nConstraint=%d\n", pIdxInfo->nOrderBy, pIdxInfo->nConstraint);
  4808. #endif
  4809. for (int i = 0; i < pIdxInfo->nConstraint; i++) {
  4810. u8 vtabIn = 0;
  4811. #if COMPILER_SUPPORTS_VTAB_IN
  4812. if (sqlite3_libversion_number() >= 3038000) {
  4813. vtabIn = sqlite3_vtab_in(pIdxInfo, i, -1);
  4814. }
  4815. #endif
  4816. #ifdef SQLITE_VEC_DEBUG
  4817. printf("xBestIndex [%d] usable=%d iColumn=%d op=%d vtabin=%d\n", i,
  4818. pIdxInfo->aConstraint[i].usable, pIdxInfo->aConstraint[i].iColumn,
  4819. pIdxInfo->aConstraint[i].op, vtabIn);
  4820. #endif
  4821. if (!pIdxInfo->aConstraint[i].usable)
  4822. continue;
  4823. int iColumn = pIdxInfo->aConstraint[i].iColumn;
  4824. int op = pIdxInfo->aConstraint[i].op;
  4825. if (op == SQLITE_INDEX_CONSTRAINT_LIMIT) {
  4826. iLimitTerm = i;
  4827. }
  4828. if (op == SQLITE_INDEX_CONSTRAINT_MATCH &&
  4829. vec0_column_idx_is_vector(p, iColumn)) {
  4830. if (iMatchTerm > -1) {
  4831. vtab_set_error(
  4832. pVTab, "only 1 MATCH operator is allowed in a single vec0 query");
  4833. return SQLITE_ERROR;
  4834. }
  4835. iMatchTerm = i;
  4836. iMatchVectorTerm = vec0_column_idx_to_vector_idx(p, iColumn);
  4837. }
  4838. if (op == SQLITE_INDEX_CONSTRAINT_EQ && iColumn == VEC0_COLUMN_ID) {
  4839. if (vtabIn) {
  4840. if (iRowidInTerm != -1) {
  4841. vtab_set_error(pVTab, "only 1 'rowid in (..)' operator is allowed in "
  4842. "a single vec0 query");
  4843. return SQLITE_ERROR;
  4844. }
  4845. iRowidInTerm = i;
  4846. } else {
  4847. iRowidTerm = i;
  4848. }
  4849. }
  4850. if (op == SQLITE_INDEX_CONSTRAINT_EQ && iColumn == vec0_column_k_idx(p)) {
  4851. iKTerm = i;
  4852. }
  4853. if(
  4854. (op != SQLITE_INDEX_CONSTRAINT_LIMIT && op != SQLITE_INDEX_CONSTRAINT_OFFSET)
  4855. && vec0_column_idx_is_auxiliary(p, iColumn)) {
  4856. hasAuxConstraint = 1;
  4857. }
  4858. }
  4859. sqlite3_str *idxStr = sqlite3_str_new(NULL);
  4860. int rc;
  4861. if (iMatchTerm >= 0) {
  4862. if (iLimitTerm < 0 && iKTerm < 0) {
  4863. vtab_set_error(
  4864. pVTab,
  4865. "A LIMIT or 'k = ?' constraint is required on vec0 knn queries.");
  4866. rc = SQLITE_ERROR;
  4867. goto done;
  4868. }
  4869. if (iLimitTerm >= 0 && iKTerm >= 0) {
  4870. vtab_set_error(pVTab, "Only LIMIT or 'k =?' can be provided, not both");
  4871. rc = SQLITE_ERROR;
  4872. goto done;
  4873. }
  4874. if (pIdxInfo->nOrderBy) {
  4875. if (pIdxInfo->nOrderBy > 1) {
  4876. vtab_set_error(pVTab, "Only a single 'ORDER BY distance' clause is "
  4877. "allowed on vec0 KNN queries");
  4878. rc = SQLITE_ERROR;
  4879. goto done;
  4880. }
  4881. if (pIdxInfo->aOrderBy[0].iColumn != vec0_column_distance_idx(p)) {
  4882. vtab_set_error(pVTab,
  4883. "Only a single 'ORDER BY distance' clause is allowed on "
  4884. "vec0 KNN queries, not on other columns");
  4885. rc = SQLITE_ERROR;
  4886. goto done;
  4887. }
  4888. if (pIdxInfo->aOrderBy[0].desc) {
  4889. vtab_set_error(
  4890. pVTab, "Only ascending in ORDER BY distance clause is supported, "
  4891. "DESC is not supported yet.");
  4892. rc = SQLITE_ERROR;
  4893. goto done;
  4894. }
  4895. }
  4896. if(hasAuxConstraint) {
  4897. // IMP: V25623_09693
  4898. vtab_set_error(pVTab, "An illegal WHERE constraint was provided on a vec0 auxiliary column in a KNN query.");
  4899. rc = SQLITE_ERROR;
  4900. goto done;
  4901. }
  4902. sqlite3_str_appendchar(idxStr, 1, VEC0_QUERY_PLAN_KNN);
  4903. int argvIndex = 1;
  4904. pIdxInfo->aConstraintUsage[iMatchTerm].argvIndex = argvIndex++;
  4905. pIdxInfo->aConstraintUsage[iMatchTerm].omit = 1;
  4906. sqlite3_str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_MATCH);
  4907. sqlite3_str_appendchar(idxStr, 3, '_');
  4908. if (iLimitTerm >= 0) {
  4909. pIdxInfo->aConstraintUsage[iLimitTerm].argvIndex = argvIndex++;
  4910. pIdxInfo->aConstraintUsage[iLimitTerm].omit = 1;
  4911. } else {
  4912. pIdxInfo->aConstraintUsage[iKTerm].argvIndex = argvIndex++;
  4913. pIdxInfo->aConstraintUsage[iKTerm].omit = 1;
  4914. }
  4915. sqlite3_str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_K);
  4916. sqlite3_str_appendchar(idxStr, 3, '_');
  4917. #if COMPILER_SUPPORTS_VTAB_IN
  4918. if (iRowidInTerm >= 0) {
  4919. // already validated as >= SQLite 3.38 bc iRowidInTerm is only >= 0 when
  4920. // vtabIn == 1
  4921. sqlite3_vtab_in(pIdxInfo, iRowidInTerm, 1);
  4922. pIdxInfo->aConstraintUsage[iRowidInTerm].argvIndex = argvIndex++;
  4923. pIdxInfo->aConstraintUsage[iRowidInTerm].omit = 1;
  4924. sqlite3_str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_ROWID_IN);
  4925. sqlite3_str_appendchar(idxStr, 3, '_');
  4926. }
  4927. #endif
  4928. for (int i = 0; i < pIdxInfo->nConstraint; i++) {
  4929. if (!pIdxInfo->aConstraint[i].usable)
  4930. continue;
  4931. int iColumn = pIdxInfo->aConstraint[i].iColumn;
  4932. int op = pIdxInfo->aConstraint[i].op;
  4933. if(op == SQLITE_INDEX_CONSTRAINT_LIMIT || op == SQLITE_INDEX_CONSTRAINT_OFFSET) {
  4934. continue;
  4935. }
  4936. if(!vec0_column_idx_is_partition(p, iColumn)) {
  4937. continue;
  4938. }
  4939. int partition_idx = vec0_column_idx_to_partition_idx(p, iColumn);
  4940. char value = 0;
  4941. switch(op) {
  4942. case SQLITE_INDEX_CONSTRAINT_EQ: {
  4943. value = VEC0_PARTITION_OPERATOR_EQ;
  4944. break;
  4945. }
  4946. case SQLITE_INDEX_CONSTRAINT_GT: {
  4947. value = VEC0_PARTITION_OPERATOR_GT;
  4948. break;
  4949. }
  4950. case SQLITE_INDEX_CONSTRAINT_LE: {
  4951. value = VEC0_PARTITION_OPERATOR_LE;
  4952. break;
  4953. }
  4954. case SQLITE_INDEX_CONSTRAINT_LT: {
  4955. value = VEC0_PARTITION_OPERATOR_LT;
  4956. break;
  4957. }
  4958. case SQLITE_INDEX_CONSTRAINT_GE: {
  4959. value = VEC0_PARTITION_OPERATOR_GE;
  4960. break;
  4961. }
  4962. case SQLITE_INDEX_CONSTRAINT_NE: {
  4963. value = VEC0_PARTITION_OPERATOR_NE;
  4964. break;
  4965. }
  4966. }
  4967. if(value) {
  4968. pIdxInfo->aConstraintUsage[i].argvIndex = argvIndex++;
  4969. pIdxInfo->aConstraintUsage[i].omit = 1;
  4970. sqlite3_str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT);
  4971. sqlite3_str_appendchar(idxStr, 1, 'A' + partition_idx);
  4972. sqlite3_str_appendchar(idxStr, 1, value);
  4973. sqlite3_str_appendchar(idxStr, 1, '_');
  4974. }
  4975. }
  4976. for (int i = 0; i < pIdxInfo->nConstraint; i++) {
  4977. if (!pIdxInfo->aConstraint[i].usable)
  4978. continue;
  4979. int iColumn = pIdxInfo->aConstraint[i].iColumn;
  4980. int op = pIdxInfo->aConstraint[i].op;
  4981. if(op == SQLITE_INDEX_CONSTRAINT_LIMIT || op == SQLITE_INDEX_CONSTRAINT_OFFSET) {
  4982. continue;
  4983. }
  4984. if(!vec0_column_idx_is_metadata(p, iColumn)) {
  4985. continue;
  4986. }
  4987. int metadata_idx = vec0_column_idx_to_metadata_idx(p, iColumn);
  4988. char value = 0;
  4989. switch(op) {
  4990. case SQLITE_INDEX_CONSTRAINT_EQ: {
  4991. int vtabIn = 0;
  4992. #if COMPILER_SUPPORTS_VTAB_IN
  4993. if (sqlite3_libversion_number() >= 3038000) {
  4994. vtabIn = sqlite3_vtab_in(pIdxInfo, i, -1);
  4995. }
  4996. if(vtabIn) {
  4997. switch(p->metadata_columns[metadata_idx].kind) {
  4998. case VEC0_METADATA_COLUMN_KIND_FLOAT:
  4999. case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
  5000. // IMP: V15248_32086
  5001. rc = SQLITE_ERROR;
  5002. vtab_set_error(pVTab, "'xxx in (...)' is only available on INTEGER or TEXT metadata columns.");
  5003. goto done;
  5004. break;
  5005. }
  5006. case VEC0_METADATA_COLUMN_KIND_INTEGER:
  5007. case VEC0_METADATA_COLUMN_KIND_TEXT: {
  5008. break;
  5009. }
  5010. }
  5011. value = VEC0_METADATA_OPERATOR_IN;
  5012. sqlite3_vtab_in(pIdxInfo, i, 1);
  5013. }else
  5014. #endif
  5015. {
  5016. value = VEC0_PARTITION_OPERATOR_EQ;
  5017. }
  5018. break;
  5019. }
  5020. case SQLITE_INDEX_CONSTRAINT_GT: {
  5021. value = VEC0_METADATA_OPERATOR_GT;
  5022. break;
  5023. }
  5024. case SQLITE_INDEX_CONSTRAINT_LE: {
  5025. value = VEC0_METADATA_OPERATOR_LE;
  5026. break;
  5027. }
  5028. case SQLITE_INDEX_CONSTRAINT_LT: {
  5029. value = VEC0_METADATA_OPERATOR_LT;
  5030. break;
  5031. }
  5032. case SQLITE_INDEX_CONSTRAINT_GE: {
  5033. value = VEC0_METADATA_OPERATOR_GE;
  5034. break;
  5035. }
  5036. case SQLITE_INDEX_CONSTRAINT_NE: {
  5037. value = VEC0_METADATA_OPERATOR_NE;
  5038. break;
  5039. }
  5040. default: {
  5041. // IMP: V16511_00582
  5042. rc = SQLITE_ERROR;
  5043. vtab_set_error(pVTab,
  5044. "An illegal WHERE constraint was provided on a vec0 metadata column in a KNN query. "
  5045. "Only one of EQUALS, GREATER_THAN, LESS_THAN_OR_EQUAL, LESS_THAN, GREATER_THAN_OR_EQUAL, NOT_EQUALS is allowed."
  5046. );
  5047. goto done;
  5048. }
  5049. }
  5050. if(p->metadata_columns[metadata_idx].kind == VEC0_METADATA_COLUMN_KIND_BOOLEAN) {
  5051. if(!(value == VEC0_METADATA_OPERATOR_EQ || value == VEC0_METADATA_OPERATOR_NE)) {
  5052. // IMP: V10145_26984
  5053. rc = SQLITE_ERROR;
  5054. vtab_set_error(pVTab, "ONLY EQUALS (=) or NOT_EQUALS (!=) operators are allowed on boolean metadata columns.");
  5055. goto done;
  5056. }
  5057. }
  5058. pIdxInfo->aConstraintUsage[i].argvIndex = argvIndex++;
  5059. pIdxInfo->aConstraintUsage[i].omit = 1;
  5060. sqlite3_str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_METADATA_CONSTRAINT);
  5061. sqlite3_str_appendchar(idxStr, 1, 'A' + metadata_idx);
  5062. sqlite3_str_appendchar(idxStr, 1, value);
  5063. sqlite3_str_appendchar(idxStr, 1, '_');
  5064. }
  5065. pIdxInfo->idxNum = iMatchVectorTerm;
  5066. pIdxInfo->estimatedCost = 30.0;
  5067. pIdxInfo->estimatedRows = 10;
  5068. } else if (iRowidTerm >= 0) {
  5069. sqlite3_str_appendchar(idxStr, 1, VEC0_QUERY_PLAN_POINT);
  5070. pIdxInfo->aConstraintUsage[iRowidTerm].argvIndex = 1;
  5071. pIdxInfo->aConstraintUsage[iRowidTerm].omit = 1;
  5072. sqlite3_str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_POINT_ID);
  5073. sqlite3_str_appendchar(idxStr, 3, '_');
  5074. pIdxInfo->idxNum = pIdxInfo->colUsed;
  5075. pIdxInfo->estimatedCost = 10.0;
  5076. pIdxInfo->estimatedRows = 1;
  5077. } else {
  5078. sqlite3_str_appendchar(idxStr, 1, VEC0_QUERY_PLAN_FULLSCAN);
  5079. pIdxInfo->estimatedCost = 3000000.0;
  5080. pIdxInfo->estimatedRows = 100000;
  5081. }
  5082. pIdxInfo->idxStr = sqlite3_str_finish(idxStr);
  5083. idxStr = NULL;
  5084. if (!pIdxInfo->idxStr) {
  5085. rc = SQLITE_OK;
  5086. goto done;
  5087. }
  5088. pIdxInfo->needToFreeIdxStr = 1;
  5089. rc = SQLITE_OK;
  5090. done:
  5091. if(idxStr) {
  5092. sqlite3_str_finish(idxStr);
  5093. }
  5094. return rc;
  5095. }
  5096. // forward delcaration bc vec0Filter uses it
  5097. static int vec0Next(sqlite3_vtab_cursor *cur);
  5098. void merge_sorted_lists(f32 *a, i64 *a_rowids, i64 a_length, f32 *b,
  5099. i64 *b_rowids, i32 *b_top_idxs, i64 b_length, f32 *out,
  5100. i64 *out_rowids, i64 out_length, i64 *out_used) {
  5101. // assert((a_length >= out_length) || (b_length >= out_length));
  5102. i64 ptrA = 0;
  5103. i64 ptrB = 0;
  5104. for (int i = 0; i < out_length; i++) {
  5105. if ((ptrA >= a_length) && (ptrB >= b_length)) {
  5106. *out_used = i;
  5107. return;
  5108. }
  5109. if (ptrA >= a_length) {
  5110. out[i] = b[b_top_idxs[ptrB]];
  5111. out_rowids[i] = b_rowids[b_top_idxs[ptrB]];
  5112. ptrB++;
  5113. } else if (ptrB >= b_length) {
  5114. out[i] = a[ptrA];
  5115. out_rowids[i] = a_rowids[ptrA];
  5116. ptrA++;
  5117. } else {
  5118. if (a[ptrA] <= b[b_top_idxs[ptrB]]) {
  5119. out[i] = a[ptrA];
  5120. out_rowids[i] = a_rowids[ptrA];
  5121. ptrA++;
  5122. } else {
  5123. out[i] = b[b_top_idxs[ptrB]];
  5124. out_rowids[i] = b_rowids[b_top_idxs[ptrB]];
  5125. ptrB++;
  5126. }
  5127. }
  5128. }
  5129. *out_used = out_length;
  5130. }
  5131. u8 *bitmap_new(i32 n) {
  5132. assert(n % 8 == 0);
  5133. u8 *p = sqlite3_malloc(n * sizeof(u8) / CHAR_BIT);
  5134. if (p) {
  5135. memset(p, 0, n * sizeof(u8) / CHAR_BIT);
  5136. }
  5137. return p;
  5138. }
  5139. u8 *bitmap_new_from(i32 n, u8 *from) {
  5140. assert(n % 8 == 0);
  5141. u8 *p = sqlite3_malloc(n * sizeof(u8) / CHAR_BIT);
  5142. if (p) {
  5143. memcpy(p, from, n / CHAR_BIT);
  5144. }
  5145. return p;
  5146. }
  5147. void bitmap_copy(u8 *base, u8 *from, i32 n) {
  5148. assert(n % 8 == 0);
  5149. memcpy(base, from, n / CHAR_BIT);
  5150. }
  5151. void bitmap_and_inplace(u8 *base, u8 *other, i32 n) {
  5152. assert((n % 8) == 0);
  5153. for (int i = 0; i < n / CHAR_BIT; i++) {
  5154. base[i] = base[i] & other[i];
  5155. }
  5156. }
  5157. void bitmap_set(u8 *bitmap, i32 position, int value) {
  5158. if (value) {
  5159. bitmap[position / CHAR_BIT] |= 1 << (position % CHAR_BIT);
  5160. } else {
  5161. bitmap[position / CHAR_BIT] &= ~(1 << (position % CHAR_BIT));
  5162. }
  5163. }
  5164. int bitmap_get(u8 *bitmap, i32 position) {
  5165. return (((bitmap[position / CHAR_BIT]) >> (position % CHAR_BIT)) & 1);
  5166. }
  5167. void bitmap_clear(u8 *bitmap, i32 n) {
  5168. assert((n % 8) == 0);
  5169. memset(bitmap, 0, n / CHAR_BIT);
  5170. }
  5171. void bitmap_fill(u8 *bitmap, i32 n) {
  5172. assert((n % 8) == 0);
  5173. memset(bitmap, 0xFF, n / CHAR_BIT);
  5174. }
  5175. /**
  5176. * @brief Finds the minimum k items in distances, and writes the indicies to
  5177. * out.
  5178. *
  5179. * @param distances input f32 array of size n, the items to consider.
  5180. * @param n: size of distances array.
  5181. * @param out: Output array of size k, will contain at most k element indicies
  5182. * @param k: Size of output array
  5183. * @return int
  5184. */
  5185. int min_idx(const f32 *distances, i32 n, u8 *candidates, i32 *out, i32 k,
  5186. u8 *bTaken, i32 *k_used) {
  5187. assert(k > 0);
  5188. assert(k <= n);
  5189. bitmap_clear(bTaken, n);
  5190. for (int ik = 0; ik < k; ik++) {
  5191. int min_idx = 0;
  5192. while (min_idx < n &&
  5193. (bitmap_get(bTaken, min_idx) || !bitmap_get(candidates, min_idx))) {
  5194. min_idx++;
  5195. }
  5196. if (min_idx >= n) {
  5197. *k_used = ik;
  5198. return SQLITE_OK;
  5199. }
  5200. for (int i = 0; i < n; i++) {
  5201. if (distances[i] <= distances[min_idx] && !bitmap_get(bTaken, i) &&
  5202. (bitmap_get(candidates, i))) {
  5203. min_idx = i;
  5204. }
  5205. }
  5206. out[ik] = min_idx;
  5207. bitmap_set(bTaken, min_idx, 1);
  5208. }
  5209. *k_used = k;
  5210. return SQLITE_OK;
  5211. }
  5212. int vec0_get_metadata_text_long_value(
  5213. vec0_vtab * p,
  5214. sqlite3_stmt ** stmt,
  5215. int metadata_idx,
  5216. i64 rowid,
  5217. int *n,
  5218. char ** s) {
  5219. int rc;
  5220. if(!(*stmt)) {
  5221. const char * zSql = sqlite3_mprintf("select data from " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " where rowid = ?", p->schemaName, p->tableName, metadata_idx);
  5222. if(!zSql) {
  5223. rc = SQLITE_NOMEM;
  5224. goto done;
  5225. }
  5226. rc = sqlite3_prepare_v2(p->db, zSql, -1, stmt, NULL);
  5227. sqlite3_free( (void *) zSql);
  5228. if(rc != SQLITE_OK) {
  5229. goto done;
  5230. }
  5231. }
  5232. sqlite3_reset(*stmt);
  5233. sqlite3_bind_int64(*stmt, 1, rowid);
  5234. rc = sqlite3_step(*stmt);
  5235. if(rc != SQLITE_ROW) {
  5236. rc = SQLITE_ERROR;
  5237. goto done;
  5238. }
  5239. *s = (char *) sqlite3_column_text(*stmt, 0);
  5240. *n = sqlite3_column_bytes(*stmt, 0);
  5241. rc = SQLITE_OK;
  5242. done:
  5243. return rc;
  5244. }
  5245. /**
  5246. * @brief Crete at "iterator" (sqlite3_stmt) of chunks with the given constraints
  5247. *
  5248. * Any VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT values in idxStr/argv will be applied
  5249. * as WHERE constraints in the underlying stmt SQL, and any consumer of the stmt
  5250. * can freely step through the stmt with all constraints satisfied.
  5251. *
  5252. * @param p - vec0_vtab
  5253. * @param idxStr - the xBestIndex/xFilter idxstr containing VEC0_IDXSTR values
  5254. * @param argc - number of argv values from xFilter
  5255. * @param argv - array of sqlite3_value from xFilter
  5256. * @param outStmt - output sqlite3_stmt of chunks with all filters applied
  5257. * @return int SQLITE_OK on success, error code otherwise
  5258. */
  5259. int vec0_chunks_iter(vec0_vtab * p, const char * idxStr, int argc, sqlite3_value ** argv, sqlite3_stmt** outStmt) {
  5260. // always null terminated, enforced by SQLite
  5261. int idxStrLength = strlen(idxStr);
  5262. // "1" refers to the initial vec0_query_plan char, 4 is the number of chars per "element"
  5263. int numValueEntries = (idxStrLength-1) / 4;
  5264. assert(argc == numValueEntries);
  5265. int rc;
  5266. sqlite3_str * s = sqlite3_str_new(NULL);
  5267. sqlite3_str_appendf(s, "select chunk_id, validity, rowids "
  5268. " from " VEC0_SHADOW_CHUNKS_NAME,
  5269. p->schemaName, p->tableName);
  5270. int appendedWhere = 0;
  5271. for(int i = 0; i < numValueEntries; i++) {
  5272. int idx = 1 + (i * 4);
  5273. char kind = idxStr[idx + 0];
  5274. if(kind != VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT) {
  5275. continue;
  5276. }
  5277. int partition_idx = idxStr[idx + 1] - 'A';
  5278. int operator = idxStr[idx + 2];
  5279. // idxStr[idx + 3] is just null, a '_' placeholder
  5280. if(!appendedWhere) {
  5281. sqlite3_str_appendall(s, " WHERE ");
  5282. appendedWhere = 1;
  5283. }else {
  5284. sqlite3_str_appendall(s, " AND ");
  5285. }
  5286. switch(operator) {
  5287. case VEC0_PARTITION_OPERATOR_EQ:
  5288. sqlite3_str_appendf(s, " partition%02d = ? ", partition_idx);
  5289. break;
  5290. case VEC0_PARTITION_OPERATOR_GT:
  5291. sqlite3_str_appendf(s, " partition%02d > ? ", partition_idx);
  5292. break;
  5293. case VEC0_PARTITION_OPERATOR_LE:
  5294. sqlite3_str_appendf(s, " partition%02d <= ? ", partition_idx);
  5295. break;
  5296. case VEC0_PARTITION_OPERATOR_LT:
  5297. sqlite3_str_appendf(s, " partition%02d < ? ", partition_idx);
  5298. break;
  5299. case VEC0_PARTITION_OPERATOR_GE:
  5300. sqlite3_str_appendf(s, " partition%02d >= ? ", partition_idx);
  5301. break;
  5302. case VEC0_PARTITION_OPERATOR_NE:
  5303. sqlite3_str_appendf(s, " partition%02d != ? ", partition_idx);
  5304. break;
  5305. default: {
  5306. char * zSql = sqlite3_str_finish(s);
  5307. sqlite3_free(zSql);
  5308. return SQLITE_ERROR;
  5309. }
  5310. }
  5311. }
  5312. char *zSql = sqlite3_str_finish(s);
  5313. if (!zSql) {
  5314. return SQLITE_NOMEM;
  5315. }
  5316. rc = sqlite3_prepare_v2(p->db, zSql, -1, outStmt, NULL);
  5317. sqlite3_free(zSql);
  5318. if(rc != SQLITE_OK) {
  5319. return rc;
  5320. }
  5321. int n = 1;
  5322. for(int i = 0; i < numValueEntries; i++) {
  5323. int idx = 1 + (i * 4);
  5324. char kind = idxStr[idx + 0];
  5325. if(kind != VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT) {
  5326. continue;
  5327. }
  5328. sqlite3_bind_value(*outStmt, n++, argv[i]);
  5329. }
  5330. return rc;
  5331. }
  5332. // a single `xxx in (...)` constraint on a metadata column. TEXT or INTEGER only for now.
  5333. struct Vec0MetadataIn{
  5334. // index of argv[i]` the constraint is on
  5335. int argv_idx;
  5336. // metadata column index of the constraint, derived from idxStr + argv_idx
  5337. int metadata_idx;
  5338. // array of the copied `(...)` values from sqlite3_vtab_in_first()/sqlite3_vtab_in_next()
  5339. struct Array array;
  5340. };
  5341. // Array elements for `xxx in (...)` values for a text column. basically just a string
  5342. struct Vec0MetadataInTextEntry {
  5343. int n;
  5344. char * zString;
  5345. };
  5346. int vec0_metadata_filter_text(vec0_vtab * p, sqlite3_value * value, const void * buffer, int size, vec0_metadata_operator op, u8* b, int metadata_idx, int chunk_rowid, struct Array * aMetadataIn, int argv_idx) {
  5347. int rc;
  5348. sqlite3_stmt * stmt = NULL;
  5349. i64 * rowids = NULL;
  5350. sqlite3_blob * rowidsBlob;
  5351. const char * sTarget = (const char *) sqlite3_value_text(value);
  5352. int nTarget = sqlite3_value_bytes(value);
  5353. // TODO(perf): only text metadata news the rowids BLOB. Make it so that
  5354. // rowids BLOB is re-used when multiple fitlers on text columns,
  5355. // ex "name BETWEEN 'a' and 'b'""
  5356. rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName, "rowids", chunk_rowid, 0, &rowidsBlob);
  5357. if(rc != SQLITE_OK) {
  5358. return rc;
  5359. }
  5360. assert(sqlite3_blob_bytes(rowidsBlob) % sizeof(i64) == 0);
  5361. assert((sqlite3_blob_bytes(rowidsBlob) / sizeof(i64)) == size);
  5362. rowids = sqlite3_malloc(sqlite3_blob_bytes(rowidsBlob));
  5363. if(!rowids) {
  5364. sqlite3_blob_close(rowidsBlob);
  5365. return SQLITE_NOMEM;
  5366. }
  5367. rc = sqlite3_blob_read(rowidsBlob, rowids, sqlite3_blob_bytes(rowidsBlob), 0);
  5368. if(rc != SQLITE_OK) {
  5369. sqlite3_blob_close(rowidsBlob);
  5370. return rc;
  5371. }
  5372. sqlite3_blob_close(rowidsBlob);
  5373. switch(op) {
  5374. int nPrefix;
  5375. char * sPrefix;
  5376. char *sFull;
  5377. int nFull;
  5378. u8 * view;
  5379. case VEC0_METADATA_OPERATOR_EQ: {
  5380. for(int i = 0; i < size; i++) {
  5381. view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
  5382. nPrefix = ((int*) view)[0];
  5383. sPrefix = (char *) &view[4];
  5384. // for EQ the text lengths must match
  5385. if(nPrefix != nTarget) {
  5386. bitmap_set(b, i, 0);
  5387. continue;
  5388. }
  5389. int cmpPrefix = strncmp(sPrefix, sTarget, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH));
  5390. // for short strings, use the prefix comparison direclty
  5391. if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
  5392. bitmap_set(b, i, cmpPrefix == 0);
  5393. continue;
  5394. }
  5395. // for EQ on longs strings, the prefix must match
  5396. if(cmpPrefix) {
  5397. bitmap_set(b, i, 0);
  5398. continue;
  5399. }
  5400. // consult the full string
  5401. rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
  5402. if(rc != SQLITE_OK) {
  5403. goto done;
  5404. }
  5405. if(nPrefix != nFull) {
  5406. rc = SQLITE_ERROR;
  5407. goto done;
  5408. }
  5409. bitmap_set(b, i, strncmp(sFull, sTarget, nFull) == 0);
  5410. }
  5411. break;
  5412. }
  5413. case VEC0_METADATA_OPERATOR_NE: {
  5414. for(int i = 0; i < size; i++) {
  5415. view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
  5416. nPrefix = ((int*) view)[0];
  5417. sPrefix = (char *) &view[4];
  5418. // for NE if text lengths dont match, it never will
  5419. if(nPrefix != nTarget) {
  5420. bitmap_set(b, i, 1);
  5421. continue;
  5422. }
  5423. int cmpPrefix = strncmp(sPrefix, sTarget, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH));
  5424. // for short strings, use the prefix comparison direclty
  5425. if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
  5426. bitmap_set(b, i, cmpPrefix != 0);
  5427. continue;
  5428. }
  5429. // for NE on longs strings, if prefixes dont match, then long string wont
  5430. if(cmpPrefix) {
  5431. bitmap_set(b, i, 1);
  5432. continue;
  5433. }
  5434. // consult the full string
  5435. rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
  5436. if(rc != SQLITE_OK) {
  5437. goto done;
  5438. }
  5439. if(nPrefix != nFull) {
  5440. rc = SQLITE_ERROR;
  5441. goto done;
  5442. }
  5443. bitmap_set(b, i, strncmp(sFull, sTarget, nFull) != 0);
  5444. }
  5445. break;
  5446. }
  5447. case VEC0_METADATA_OPERATOR_GT: {
  5448. for(int i = 0; i < size; i++) {
  5449. view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
  5450. nPrefix = ((int*) view)[0];
  5451. sPrefix = (char *) &view[4];
  5452. int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget));
  5453. if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
  5454. // if prefix match, check which is longer
  5455. if(cmpPrefix == 0) {
  5456. bitmap_set(b, i, nPrefix > nTarget);
  5457. }
  5458. else {
  5459. bitmap_set(b, i, cmpPrefix > 0);
  5460. }
  5461. continue;
  5462. }
  5463. // TODO(perf): may not need to compare full text in some cases
  5464. rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
  5465. if(rc != SQLITE_OK) {
  5466. goto done;
  5467. }
  5468. if(nPrefix != nFull) {
  5469. rc = SQLITE_ERROR;
  5470. goto done;
  5471. }
  5472. bitmap_set(b, i, strncmp(sFull, sTarget, nFull) > 0);
  5473. }
  5474. break;
  5475. }
  5476. case VEC0_METADATA_OPERATOR_GE: {
  5477. for(int i = 0; i < size; i++) {
  5478. view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
  5479. nPrefix = ((int*) view)[0];
  5480. sPrefix = (char *) &view[4];
  5481. int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget));
  5482. if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
  5483. // if prefix match, check which is longer
  5484. if(cmpPrefix == 0) {
  5485. bitmap_set(b, i, nPrefix >= nTarget);
  5486. }
  5487. else {
  5488. bitmap_set(b, i, cmpPrefix >= 0);
  5489. }
  5490. continue;
  5491. }
  5492. // TODO(perf): may not need to compare full text in some cases
  5493. rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
  5494. if(rc != SQLITE_OK) {
  5495. goto done;
  5496. }
  5497. if(nPrefix != nFull) {
  5498. rc = SQLITE_ERROR;
  5499. goto done;
  5500. }
  5501. bitmap_set(b, i, strncmp(sFull, sTarget, nFull) >= 0);
  5502. }
  5503. break;
  5504. }
  5505. case VEC0_METADATA_OPERATOR_LE: {
  5506. for(int i = 0; i < size; i++) {
  5507. view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
  5508. nPrefix = ((int*) view)[0];
  5509. sPrefix = (char *) &view[4];
  5510. int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget));
  5511. if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
  5512. // if prefix match, check which is longer
  5513. if(cmpPrefix == 0) {
  5514. bitmap_set(b, i, nPrefix <= nTarget);
  5515. }
  5516. else {
  5517. bitmap_set(b, i, cmpPrefix <= 0);
  5518. }
  5519. continue;
  5520. }
  5521. // TODO(perf): may not need to compare full text in some cases
  5522. rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
  5523. if(rc != SQLITE_OK) {
  5524. goto done;
  5525. }
  5526. if(nPrefix != nFull) {
  5527. rc = SQLITE_ERROR;
  5528. goto done;
  5529. }
  5530. bitmap_set(b, i, strncmp(sFull, sTarget, nFull) <= 0);
  5531. }
  5532. break;
  5533. }
  5534. case VEC0_METADATA_OPERATOR_LT: {
  5535. for(int i = 0; i < size; i++) {
  5536. view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
  5537. nPrefix = ((int*) view)[0];
  5538. sPrefix = (char *) &view[4];
  5539. int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget));
  5540. if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
  5541. // if prefix match, check which is longer
  5542. if(cmpPrefix == 0) {
  5543. bitmap_set(b, i, nPrefix < nTarget);
  5544. }
  5545. else {
  5546. bitmap_set(b, i, cmpPrefix < 0);
  5547. }
  5548. continue;
  5549. }
  5550. // TODO(perf): may not need to compare full text in some cases
  5551. rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
  5552. if(rc != SQLITE_OK) {
  5553. goto done;
  5554. }
  5555. if(nPrefix != nFull) {
  5556. rc = SQLITE_ERROR;
  5557. goto done;
  5558. }
  5559. bitmap_set(b, i, strncmp(sFull, sTarget, nFull) < 0);
  5560. }
  5561. break;
  5562. }
  5563. case VEC0_METADATA_OPERATOR_IN: {
  5564. size_t metadataInIdx = -1;
  5565. for(size_t i = 0; i < aMetadataIn->length; i++) {
  5566. struct Vec0MetadataIn * metadataIn = &(((struct Vec0MetadataIn *) aMetadataIn->z)[i]);
  5567. if(metadataIn->argv_idx == argv_idx) {
  5568. metadataInIdx = i;
  5569. break;
  5570. }
  5571. }
  5572. if(metadataInIdx < 0) {
  5573. rc = SQLITE_ERROR;
  5574. goto done;
  5575. }
  5576. struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[metadataInIdx];
  5577. struct Array * aTarget = &(metadataIn->array);
  5578. int nPrefix;
  5579. char * sPrefix;
  5580. char *sFull;
  5581. int nFull;
  5582. u8 * view;
  5583. for(int i = 0; i < size; i++) {
  5584. view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
  5585. nPrefix = ((int*) view)[0];
  5586. sPrefix = (char *) &view[4];
  5587. for(size_t target_idx = 0; target_idx < aTarget->length; target_idx++) {
  5588. struct Vec0MetadataInTextEntry * entry = &(((struct Vec0MetadataInTextEntry*)aTarget->z)[target_idx]);
  5589. if(entry->n != nPrefix) {
  5590. continue;
  5591. }
  5592. int cmpPrefix = strncmp(sPrefix, entry->zString, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH));
  5593. if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
  5594. if(cmpPrefix == 0) {
  5595. bitmap_set(b, i, 1);
  5596. break;
  5597. }
  5598. continue;
  5599. }
  5600. if(cmpPrefix) {
  5601. continue;
  5602. }
  5603. rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
  5604. if(rc != SQLITE_OK) {
  5605. goto done;
  5606. }
  5607. if(nPrefix != nFull) {
  5608. rc = SQLITE_ERROR;
  5609. goto done;
  5610. }
  5611. if(strncmp(sFull, entry->zString, nFull) == 0) {
  5612. bitmap_set(b, i, 1);
  5613. break;
  5614. }
  5615. }
  5616. }
  5617. break;
  5618. }
  5619. }
  5620. rc = SQLITE_OK;
  5621. done:
  5622. sqlite3_finalize(stmt);
  5623. sqlite3_free(rowids);
  5624. return rc;
  5625. }
  5626. /**
  5627. * @brief Fill in bitmap of chunk values, whether or not the values match a metadata constraint
  5628. *
  5629. * @param p vec0_vtab
  5630. * @param metadata_idx index of the metatadata column to perfrom constraints on
  5631. * @param value sqlite3_value of the constraints value
  5632. * @param blob sqlite3_blob that is already opened on the metdata column's shadow chunk table
  5633. * @param chunk_rowid rowid of the chunk to calculate on
  5634. * @param b pre-allocated and zero'd out bitmap to write results to
  5635. * @param size size of the chunk
  5636. * @return int SQLITE_OK on success, error code otherwise
  5637. */
  5638. int vec0_set_metadata_filter_bitmap(
  5639. vec0_vtab *p,
  5640. int metadata_idx,
  5641. vec0_metadata_operator op,
  5642. sqlite3_value * value,
  5643. sqlite3_blob * blob,
  5644. i64 chunk_rowid,
  5645. u8* b,
  5646. int size,
  5647. struct Array * aMetadataIn, int argv_idx) {
  5648. // TODO: shouldn't this skip in-valid entries from the chunk's validity bitmap?
  5649. int rc;
  5650. rc = sqlite3_blob_reopen(blob, chunk_rowid);
  5651. if(rc != SQLITE_OK) {
  5652. return rc;
  5653. }
  5654. vec0_metadata_column_kind kind = p->metadata_columns[metadata_idx].kind;
  5655. int szMatch = 0;
  5656. int blobSize = sqlite3_blob_bytes(blob);
  5657. switch(kind) {
  5658. case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
  5659. szMatch = blobSize == size / CHAR_BIT;
  5660. break;
  5661. }
  5662. case VEC0_METADATA_COLUMN_KIND_INTEGER: {
  5663. szMatch = blobSize == size * sizeof(i64);
  5664. break;
  5665. }
  5666. case VEC0_METADATA_COLUMN_KIND_FLOAT: {
  5667. szMatch = blobSize == size * sizeof(double);
  5668. break;
  5669. }
  5670. case VEC0_METADATA_COLUMN_KIND_TEXT: {
  5671. szMatch = blobSize == size * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH;
  5672. break;
  5673. }
  5674. }
  5675. if(!szMatch) {
  5676. return SQLITE_ERROR;
  5677. }
  5678. void * buffer = sqlite3_malloc(blobSize);
  5679. if(!buffer) {
  5680. return SQLITE_NOMEM;
  5681. }
  5682. rc = sqlite3_blob_read(blob, buffer, blobSize, 0);
  5683. if(rc != SQLITE_OK) {
  5684. goto done;
  5685. }
  5686. switch(kind) {
  5687. case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
  5688. int target = sqlite3_value_int(value);
  5689. if( (target && op == VEC0_METADATA_OPERATOR_EQ) || (!target && op == VEC0_METADATA_OPERATOR_NE)) {
  5690. for(int i = 0; i < size; i++) { bitmap_set(b, i, bitmap_get((u8*) buffer, i)); }
  5691. }
  5692. else {
  5693. for(int i = 0; i < size; i++) { bitmap_set(b, i, !bitmap_get((u8*) buffer, i)); }
  5694. }
  5695. break;
  5696. }
  5697. case VEC0_METADATA_COLUMN_KIND_INTEGER: {
  5698. i64 * array = (i64*) buffer;
  5699. i64 target = sqlite3_value_int64(value);
  5700. switch(op) {
  5701. case VEC0_METADATA_OPERATOR_EQ: {
  5702. for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] == target); }
  5703. break;
  5704. }
  5705. case VEC0_METADATA_OPERATOR_GT: {
  5706. for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] > target); }
  5707. break;
  5708. }
  5709. case VEC0_METADATA_OPERATOR_LE: {
  5710. for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] <= target); }
  5711. break;
  5712. }
  5713. case VEC0_METADATA_OPERATOR_LT: {
  5714. for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] < target); }
  5715. break;
  5716. }
  5717. case VEC0_METADATA_OPERATOR_GE: {
  5718. for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] >= target); }
  5719. break;
  5720. }
  5721. case VEC0_METADATA_OPERATOR_NE: {
  5722. for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] != target); }
  5723. break;
  5724. }
  5725. case VEC0_METADATA_OPERATOR_IN: {
  5726. int metadataInIdx = -1;
  5727. for(size_t i = 0; i < aMetadataIn->length; i++) {
  5728. struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[i];
  5729. if(metadataIn->argv_idx == argv_idx) {
  5730. metadataInIdx = i;
  5731. break;
  5732. }
  5733. }
  5734. if(metadataInIdx < 0) {
  5735. rc = SQLITE_ERROR;
  5736. goto done;
  5737. }
  5738. struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[metadataInIdx];
  5739. struct Array * aTarget = &(metadataIn->array);
  5740. for(int i = 0; i < size; i++) {
  5741. for(size_t target_idx = 0; target_idx < aTarget->length; target_idx++) {
  5742. if( ((i64*)aTarget->z)[target_idx] == array[i]) {
  5743. bitmap_set(b, i, 1);
  5744. break;
  5745. }
  5746. }
  5747. }
  5748. break;
  5749. }
  5750. }
  5751. break;
  5752. }
  5753. case VEC0_METADATA_COLUMN_KIND_FLOAT: {
  5754. double * array = (double*) buffer;
  5755. double target = sqlite3_value_double(value);
  5756. switch(op) {
  5757. case VEC0_METADATA_OPERATOR_EQ: {
  5758. for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] == target); }
  5759. break;
  5760. }
  5761. case VEC0_METADATA_OPERATOR_GT: {
  5762. for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] > target); }
  5763. break;
  5764. }
  5765. case VEC0_METADATA_OPERATOR_LE: {
  5766. for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] <= target); }
  5767. break;
  5768. }
  5769. case VEC0_METADATA_OPERATOR_LT: {
  5770. for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] < target); }
  5771. break;
  5772. }
  5773. case VEC0_METADATA_OPERATOR_GE: {
  5774. for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] >= target); }
  5775. break;
  5776. }
  5777. case VEC0_METADATA_OPERATOR_NE: {
  5778. for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] != target); }
  5779. break;
  5780. }
  5781. case VEC0_METADATA_OPERATOR_IN: {
  5782. // should never be reached
  5783. break;
  5784. }
  5785. }
  5786. break;
  5787. }
  5788. case VEC0_METADATA_COLUMN_KIND_TEXT: {
  5789. rc = vec0_metadata_filter_text(p, value, buffer, size, op, b, metadata_idx, chunk_rowid, aMetadataIn, argv_idx);
  5790. if(rc != SQLITE_OK) {
  5791. goto done;
  5792. }
  5793. break;
  5794. }
  5795. }
  5796. done:
  5797. sqlite3_free(buffer);
  5798. return rc;
  5799. }
  5800. int vec0Filter_knn_chunks_iter(vec0_vtab *p, sqlite3_stmt *stmtChunks,
  5801. struct VectorColumnDefinition *vector_column,
  5802. int vectorColumnIdx, struct Array *arrayRowidsIn,
  5803. struct Array * aMetadataIn,
  5804. const char * idxStr, int argc, sqlite3_value ** argv,
  5805. void *queryVector, i64 k, i64 **out_topk_rowids,
  5806. f32 **out_topk_distances, i64 *out_used) {
  5807. // for each chunk, get top min(k, chunk_size) rowid + distances to query vec.
  5808. // then reconcile all topk_chunks for a true top k.
  5809. // output only rowids + distances for now
  5810. int rc = SQLITE_OK;
  5811. sqlite3_blob *blobVectors = NULL;
  5812. void *baseVectors = NULL; // memory: chunk_size * dimensions * element_size
  5813. // OWNED BY CALLER ON SUCCESS
  5814. i64 *topk_rowids = NULL; // memory: k * 4
  5815. // OWNED BY CALLER ON SUCCESS
  5816. f32 *topk_distances = NULL; // memory: k * 4
  5817. i64 *tmp_topk_rowids = NULL; // memory: k * 4
  5818. f32 *tmp_topk_distances = NULL; // memory: k * 4
  5819. f32 *chunk_distances = NULL; // memory: chunk_size * 4
  5820. u8 *b = NULL; // memory: chunk_size / 8
  5821. u8 *bTaken = NULL; // memory: chunk_size / 8
  5822. i32 *chunk_topk_idxs = NULL; // memory: k * 4
  5823. u8 *bmRowids = NULL; // memory: chunk_size / 8
  5824. u8 *bmMetadata = NULL; // memory: chunk_size / 8
  5825. // // total: a lot???
  5826. // 6 * (k * 4) + (k * 2) + (chunk_size / 8) + (chunk_size * dimensions * 4)
  5827. topk_rowids = sqlite3_malloc(k * sizeof(i64));
  5828. if (!topk_rowids) {
  5829. rc = SQLITE_NOMEM;
  5830. goto cleanup;
  5831. }
  5832. memset(topk_rowids, 0, k * sizeof(i64));
  5833. topk_distances = sqlite3_malloc(k * sizeof(f32));
  5834. if (!topk_distances) {
  5835. rc = SQLITE_NOMEM;
  5836. goto cleanup;
  5837. }
  5838. memset(topk_distances, 0, k * sizeof(f32));
  5839. tmp_topk_rowids = sqlite3_malloc(k * sizeof(i64));
  5840. if (!tmp_topk_rowids) {
  5841. rc = SQLITE_NOMEM;
  5842. goto cleanup;
  5843. }
  5844. memset(tmp_topk_rowids, 0, k * sizeof(i64));
  5845. tmp_topk_distances = sqlite3_malloc(k * sizeof(f32));
  5846. if (!tmp_topk_distances) {
  5847. rc = SQLITE_NOMEM;
  5848. goto cleanup;
  5849. }
  5850. memset(tmp_topk_distances, 0, k * sizeof(f32));
  5851. i64 k_used = 0;
  5852. i64 baseVectorsSize = p->chunk_size * vector_column_byte_size(*vector_column);
  5853. baseVectors = sqlite3_malloc(baseVectorsSize);
  5854. if (!baseVectors) {
  5855. rc = SQLITE_NOMEM;
  5856. goto cleanup;
  5857. }
  5858. chunk_distances = sqlite3_malloc(p->chunk_size * sizeof(f32));
  5859. if (!chunk_distances) {
  5860. rc = SQLITE_NOMEM;
  5861. goto cleanup;
  5862. }
  5863. b = bitmap_new(p->chunk_size);
  5864. if (!b) {
  5865. rc = SQLITE_NOMEM;
  5866. goto cleanup;
  5867. }
  5868. bTaken = bitmap_new(p->chunk_size);
  5869. if (!bTaken) {
  5870. rc = SQLITE_NOMEM;
  5871. goto cleanup;
  5872. }
  5873. chunk_topk_idxs = sqlite3_malloc(k * sizeof(i32));
  5874. if (!chunk_topk_idxs) {
  5875. rc = SQLITE_NOMEM;
  5876. goto cleanup;
  5877. }
  5878. bmRowids = arrayRowidsIn ? bitmap_new(p->chunk_size) : NULL;
  5879. if (arrayRowidsIn && !bmRowids) {
  5880. rc = SQLITE_NOMEM;
  5881. goto cleanup;
  5882. }
  5883. sqlite3_blob * metadataBlobs[VEC0_MAX_METADATA_COLUMNS];
  5884. memset(metadataBlobs, 0, sizeof(sqlite3_blob*) * VEC0_MAX_METADATA_COLUMNS);
  5885. bmMetadata = bitmap_new(p->chunk_size);
  5886. if(!bmMetadata) {
  5887. rc = SQLITE_NOMEM;
  5888. goto cleanup;
  5889. }
  5890. int idxStrLength = strlen(idxStr);
  5891. int numValueEntries = (idxStrLength-1) / 4;
  5892. assert(numValueEntries == argc);
  5893. int hasMetadataFilters = 0;
  5894. for(int i = 0; i < argc; i++) {
  5895. int idx = 1 + (i * 4);
  5896. char kind = idxStr[idx + 0];
  5897. if(kind == VEC0_IDXSTR_KIND_METADATA_CONSTRAINT) {
  5898. hasMetadataFilters = 1;
  5899. break;
  5900. }
  5901. }
  5902. while (true) {
  5903. rc = sqlite3_step(stmtChunks);
  5904. if (rc == SQLITE_DONE) {
  5905. break;
  5906. }
  5907. if (rc != SQLITE_ROW) {
  5908. vtab_set_error(&p->base, "chunks iter error");
  5909. rc = SQLITE_ERROR;
  5910. goto cleanup;
  5911. }
  5912. memset(chunk_distances, 0, p->chunk_size * sizeof(f32));
  5913. memset(chunk_topk_idxs, 0, k * sizeof(i32));
  5914. bitmap_clear(b, p->chunk_size);
  5915. i64 chunk_id = sqlite3_column_int64(stmtChunks, 0);
  5916. unsigned char *chunkValidity =
  5917. (unsigned char *)sqlite3_column_blob(stmtChunks, 1);
  5918. i64 validitySize = sqlite3_column_bytes(stmtChunks, 1);
  5919. if (validitySize != p->chunk_size / CHAR_BIT) {
  5920. // IMP: V05271_22109
  5921. vtab_set_error(
  5922. &p->base,
  5923. "chunk validity size doesn't match - expected %lld, found %lld",
  5924. p->chunk_size / CHAR_BIT, validitySize);
  5925. rc = SQLITE_ERROR;
  5926. goto cleanup;
  5927. }
  5928. i64 *chunkRowids = (i64 *)sqlite3_column_blob(stmtChunks, 2);
  5929. i64 rowidsSize = sqlite3_column_bytes(stmtChunks, 2);
  5930. if (rowidsSize != p->chunk_size * sizeof(i64)) {
  5931. // IMP: V02796_19635
  5932. vtab_set_error(&p->base, "rowids size doesn't match");
  5933. vtab_set_error(
  5934. &p->base,
  5935. "chunk rowids size doesn't match - expected %lld, found %lld",
  5936. p->chunk_size * sizeof(i64), rowidsSize);
  5937. rc = SQLITE_ERROR;
  5938. goto cleanup;
  5939. }
  5940. // open the vector chunk blob for the current chunk
  5941. rc = sqlite3_blob_open(p->db, p->schemaName,
  5942. p->shadowVectorChunksNames[vectorColumnIdx],
  5943. "vectors", chunk_id, 0, &blobVectors);
  5944. if (rc != SQLITE_OK) {
  5945. vtab_set_error(&p->base, "could not open vectors blob for chunk %lld",
  5946. chunk_id);
  5947. rc = SQLITE_ERROR;
  5948. goto cleanup;
  5949. }
  5950. i64 currentBaseVectorsSize = sqlite3_blob_bytes(blobVectors);
  5951. i64 expectedBaseVectorsSize =
  5952. p->chunk_size * vector_column_byte_size(*vector_column);
  5953. if (currentBaseVectorsSize != expectedBaseVectorsSize) {
  5954. // IMP: V16465_00535
  5955. vtab_set_error(
  5956. &p->base,
  5957. "vectors blob size doesn't match - expected %lld, found %lld",
  5958. expectedBaseVectorsSize, currentBaseVectorsSize);
  5959. rc = SQLITE_ERROR;
  5960. goto cleanup;
  5961. }
  5962. rc = sqlite3_blob_read(blobVectors, baseVectors, currentBaseVectorsSize, 0);
  5963. if (rc != SQLITE_OK) {
  5964. vtab_set_error(&p->base, "vectors blob read error for %lld", chunk_id);
  5965. rc = SQLITE_ERROR;
  5966. goto cleanup;
  5967. }
  5968. bitmap_copy(b, chunkValidity, p->chunk_size);
  5969. if (arrayRowidsIn) {
  5970. bitmap_clear(bmRowids, p->chunk_size);
  5971. for (int i = 0; i < p->chunk_size; i++) {
  5972. if (!bitmap_get(chunkValidity, i)) {
  5973. continue;
  5974. }
  5975. i64 rowid = chunkRowids[i];
  5976. void *in = bsearch(&rowid, arrayRowidsIn->z, arrayRowidsIn->length,
  5977. sizeof(i64), _cmp);
  5978. bitmap_set(bmRowids, i, in ? 1 : 0);
  5979. }
  5980. bitmap_and_inplace(b, bmRowids, p->chunk_size);
  5981. }
  5982. if(hasMetadataFilters) {
  5983. for(int i = 0; i < argc; i++) {
  5984. int idx = 1 + (i * 4);
  5985. char kind = idxStr[idx + 0];
  5986. if(kind != VEC0_IDXSTR_KIND_METADATA_CONSTRAINT) {
  5987. continue;
  5988. }
  5989. int metadata_idx = idxStr[idx + 1] - 'A';
  5990. int operator = idxStr[idx + 2];
  5991. if(!metadataBlobs[metadata_idx]) {
  5992. rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_idx], "data", chunk_id, 0, &metadataBlobs[metadata_idx]);
  5993. vtab_set_error(&p->base, "Could not open metadata blob");
  5994. if(rc != SQLITE_OK) {
  5995. goto cleanup;
  5996. }
  5997. }
  5998. bitmap_clear(bmMetadata, p->chunk_size);
  5999. rc = vec0_set_metadata_filter_bitmap(p, metadata_idx, operator, argv[i], metadataBlobs[metadata_idx], chunk_id, bmMetadata, p->chunk_size, aMetadataIn, i);
  6000. if(rc != SQLITE_OK) {
  6001. vtab_set_error(&p->base, "Could not filter metadata fields");
  6002. if(rc != SQLITE_OK) {
  6003. goto cleanup;
  6004. }
  6005. }
  6006. bitmap_and_inplace(b, bmMetadata, p->chunk_size);
  6007. }
  6008. }
  6009. for (int i = 0; i < p->chunk_size; i++) {
  6010. if (!bitmap_get(b, i)) {
  6011. continue;
  6012. };
  6013. f32 result;
  6014. switch (vector_column->element_type) {
  6015. case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
  6016. const f32 *base_i =
  6017. ((f32 *)baseVectors) + (i * vector_column->dimensions);
  6018. switch (vector_column->distance_metric) {
  6019. case VEC0_DISTANCE_METRIC_L2: {
  6020. result = distance_l2_sqr_float(base_i, (f32 *)queryVector,
  6021. &vector_column->dimensions);
  6022. break;
  6023. }
  6024. case VEC0_DISTANCE_METRIC_L1: {
  6025. result = distance_l1_f32(base_i, (f32 *)queryVector,
  6026. &vector_column->dimensions);
  6027. break;
  6028. }
  6029. case VEC0_DISTANCE_METRIC_COSINE: {
  6030. result = distance_cosine_float(base_i, (f32 *)queryVector,
  6031. &vector_column->dimensions);
  6032. break;
  6033. }
  6034. }
  6035. break;
  6036. }
  6037. case SQLITE_VEC_ELEMENT_TYPE_INT8: {
  6038. const i8 *base_i =
  6039. ((i8 *)baseVectors) + (i * vector_column->dimensions);
  6040. switch (vector_column->distance_metric) {
  6041. case VEC0_DISTANCE_METRIC_L2: {
  6042. result = distance_l2_sqr_int8(base_i, (i8 *)queryVector,
  6043. &vector_column->dimensions);
  6044. break;
  6045. }
  6046. case VEC0_DISTANCE_METRIC_L1: {
  6047. result = distance_l1_int8(base_i, (i8 *)queryVector,
  6048. &vector_column->dimensions);
  6049. break;
  6050. }
  6051. case VEC0_DISTANCE_METRIC_COSINE: {
  6052. result = distance_cosine_int8(base_i, (i8 *)queryVector,
  6053. &vector_column->dimensions);
  6054. break;
  6055. }
  6056. }
  6057. break;
  6058. }
  6059. case SQLITE_VEC_ELEMENT_TYPE_BIT: {
  6060. const u8 *base_i =
  6061. ((u8 *)baseVectors) + (i * (vector_column->dimensions / CHAR_BIT));
  6062. result = distance_hamming(base_i, (u8 *)queryVector,
  6063. &vector_column->dimensions);
  6064. break;
  6065. }
  6066. }
  6067. chunk_distances[i] = result;
  6068. }
  6069. int used1;
  6070. min_idx(chunk_distances, p->chunk_size, b, chunk_topk_idxs,
  6071. min(k, p->chunk_size), bTaken, &used1);
  6072. i64 used;
  6073. merge_sorted_lists(topk_distances, topk_rowids, k_used, chunk_distances,
  6074. chunkRowids, chunk_topk_idxs,
  6075. min(min(k, p->chunk_size), used1), tmp_topk_distances,
  6076. tmp_topk_rowids, k, &used);
  6077. for (int i = 0; i < used; i++) {
  6078. topk_rowids[i] = tmp_topk_rowids[i];
  6079. topk_distances[i] = tmp_topk_distances[i];
  6080. }
  6081. k_used = used;
  6082. // blobVectors is always opened with read-only permissions, so this never
  6083. // fails.
  6084. sqlite3_blob_close(blobVectors);
  6085. blobVectors = NULL;
  6086. }
  6087. *out_topk_rowids = topk_rowids;
  6088. *out_topk_distances = topk_distances;
  6089. *out_used = k_used;
  6090. rc = SQLITE_OK;
  6091. cleanup:
  6092. if (rc != SQLITE_OK) {
  6093. sqlite3_free(topk_rowids);
  6094. sqlite3_free(topk_distances);
  6095. }
  6096. sqlite3_free(chunk_topk_idxs);
  6097. sqlite3_free(tmp_topk_rowids);
  6098. sqlite3_free(tmp_topk_distances);
  6099. sqlite3_free(b);
  6100. sqlite3_free(bTaken);
  6101. sqlite3_free(bmRowids);
  6102. sqlite3_free(baseVectors);
  6103. sqlite3_free(chunk_distances);
  6104. sqlite3_free(bmMetadata);
  6105. for(int i = 0; i < VEC0_MAX_METADATA_COLUMNS; i++) {
  6106. sqlite3_blob_close(metadataBlobs[i]);
  6107. }
  6108. // blobVectors is always opened with read-only permissions, so this never
  6109. // fails.
  6110. sqlite3_blob_close(blobVectors);
  6111. return rc;
  6112. }
  6113. int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum,
  6114. const char *idxStr, int argc, sqlite3_value **argv) {
  6115. assert(argc == (strlen(idxStr)-1) / 4);
  6116. int rc;
  6117. struct vec0_query_knn_data *knn_data;
  6118. int vectorColumnIdx = idxNum;
  6119. struct VectorColumnDefinition *vector_column =
  6120. &p->vector_columns[vectorColumnIdx];
  6121. struct Array *arrayRowidsIn = NULL;
  6122. sqlite3_stmt *stmtChunks = NULL;
  6123. void *queryVector;
  6124. size_t dimensions;
  6125. enum VectorElementType elementType;
  6126. vector_cleanup queryVectorCleanup = vector_cleanup_noop;
  6127. char *pzError;
  6128. knn_data = sqlite3_malloc(sizeof(*knn_data));
  6129. if (!knn_data) {
  6130. return SQLITE_NOMEM;
  6131. }
  6132. memset(knn_data, 0, sizeof(*knn_data));
  6133. // array of `struct Vec0MetadataIn`, IF there are any `xxx in (...)` metadata constraints
  6134. struct Array * aMetadataIn = NULL;
  6135. int query_idx =-1;
  6136. int k_idx = -1;
  6137. int rowid_in_idx = -1;
  6138. for(int i = 0; i < argc; i++) {
  6139. if(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_KNN_MATCH) {
  6140. query_idx = i;
  6141. }
  6142. if(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_KNN_K) {
  6143. k_idx = i;
  6144. }
  6145. if(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_KNN_ROWID_IN) {
  6146. rowid_in_idx = i;
  6147. }
  6148. }
  6149. assert(query_idx >= 0);
  6150. assert(k_idx >= 0);
  6151. // make sure the query vector matches the vector column (type dimensions etc.)
  6152. rc = vector_from_value(argv[query_idx], &queryVector, &dimensions, &elementType,
  6153. &queryVectorCleanup, &pzError);
  6154. if (rc != SQLITE_OK) {
  6155. vtab_set_error(&p->base,
  6156. "Query vector on the \"%.*s\" column is invalid: %z",
  6157. vector_column->name_length, vector_column->name, pzError);
  6158. rc = SQLITE_ERROR;
  6159. goto cleanup;
  6160. }
  6161. if (elementType != vector_column->element_type) {
  6162. vtab_set_error(
  6163. &p->base,
  6164. "Query vector for the \"%.*s\" column is expected to be of type "
  6165. "%s, but a %s vector was provided.",
  6166. vector_column->name_length, vector_column->name,
  6167. vector_subtype_name(vector_column->element_type),
  6168. vector_subtype_name(elementType));
  6169. rc = SQLITE_ERROR;
  6170. goto cleanup;
  6171. }
  6172. if (dimensions != vector_column->dimensions) {
  6173. vtab_set_error(
  6174. &p->base,
  6175. "Dimension mismatch for query vector for the \"%.*s\" column. "
  6176. "Expected %d dimensions but received %d.",
  6177. vector_column->name_length, vector_column->name,
  6178. vector_column->dimensions, dimensions);
  6179. rc = SQLITE_ERROR;
  6180. goto cleanup;
  6181. }
  6182. i64 k = sqlite3_value_int64(argv[k_idx]);
  6183. if (k < 0) {
  6184. vtab_set_error(
  6185. &p->base, "k value in knn queries must be greater than or equal to 0.");
  6186. rc = SQLITE_ERROR;
  6187. goto cleanup;
  6188. }
  6189. #define SQLITE_VEC_VEC0_K_MAX 4096
  6190. if (k > SQLITE_VEC_VEC0_K_MAX) {
  6191. vtab_set_error(
  6192. &p->base,
  6193. "k value in knn query too large, provided %lld and the limit is %lld",
  6194. k, SQLITE_VEC_VEC0_K_MAX);
  6195. rc = SQLITE_ERROR;
  6196. goto cleanup;
  6197. }
  6198. if (k == 0) {
  6199. knn_data->k = 0;
  6200. pCur->knn_data = knn_data;
  6201. pCur->query_plan = VEC0_QUERY_PLAN_KNN;
  6202. rc = SQLITE_OK;
  6203. goto cleanup;
  6204. }
  6205. // handle when a `rowid in (...)` operation was provided
  6206. // Array of all the rowids that appear in any `rowid in (...)` constraint.
  6207. // NULL if none were provided, which means a "full" scan.
  6208. #if COMPILER_SUPPORTS_VTAB_IN
  6209. if (rowid_in_idx >= 0) {
  6210. sqlite3_value *item;
  6211. int rc;
  6212. arrayRowidsIn = sqlite3_malloc(sizeof(*arrayRowidsIn));
  6213. if (!arrayRowidsIn) {
  6214. rc = SQLITE_NOMEM;
  6215. goto cleanup;
  6216. }
  6217. memset(arrayRowidsIn, 0, sizeof(*arrayRowidsIn));
  6218. rc = array_init(arrayRowidsIn, sizeof(i64), 32);
  6219. if (rc != SQLITE_OK) {
  6220. goto cleanup;
  6221. }
  6222. for (rc = sqlite3_vtab_in_first(argv[rowid_in_idx], &item); rc == SQLITE_OK && item;
  6223. rc = sqlite3_vtab_in_next(argv[rowid_in_idx], &item)) {
  6224. i64 rowid;
  6225. if (p->pkIsText) {
  6226. rc = vec0_rowid_from_id(p, item, &rowid);
  6227. if (rc != SQLITE_OK) {
  6228. goto cleanup;
  6229. }
  6230. } else {
  6231. rowid = sqlite3_value_int64(item);
  6232. }
  6233. rc = array_append(arrayRowidsIn, &rowid);
  6234. if (rc != SQLITE_OK) {
  6235. goto cleanup;
  6236. }
  6237. }
  6238. if (rc != SQLITE_DONE) {
  6239. vtab_set_error(&p->base, "error processing rowid in (...) array");
  6240. goto cleanup;
  6241. }
  6242. qsort(arrayRowidsIn->z, arrayRowidsIn->length, arrayRowidsIn->element_size,
  6243. _cmp);
  6244. }
  6245. #endif
  6246. #if COMPILER_SUPPORTS_VTAB_IN
  6247. for(int i = 0; i < argc; i++) {
  6248. if(!(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_METADATA_CONSTRAINT && idxStr[1 + (i*4) + 2] == VEC0_METADATA_OPERATOR_IN)) {
  6249. continue;
  6250. }
  6251. int metadata_idx = idxStr[1 + (i*4) + 1] - 'A';
  6252. if(!aMetadataIn) {
  6253. aMetadataIn = sqlite3_malloc(sizeof(*aMetadataIn));
  6254. if(!aMetadataIn) {
  6255. rc = SQLITE_NOMEM;
  6256. goto cleanup;
  6257. }
  6258. memset(aMetadataIn, 0, sizeof(*aMetadataIn));
  6259. rc = array_init(aMetadataIn, sizeof(struct Vec0MetadataIn), 8);
  6260. if(rc != SQLITE_OK) {
  6261. goto cleanup;
  6262. }
  6263. }
  6264. struct Vec0MetadataIn item;
  6265. memset(&item, 0, sizeof(item));
  6266. item.metadata_idx=metadata_idx;
  6267. item.argv_idx = i;
  6268. switch(p->metadata_columns[metadata_idx].kind) {
  6269. case VEC0_METADATA_COLUMN_KIND_INTEGER: {
  6270. rc = array_init(&item.array, sizeof(i64), 16);
  6271. if(rc != SQLITE_OK) {
  6272. goto cleanup;
  6273. }
  6274. sqlite3_value *entry;
  6275. for (rc = sqlite3_vtab_in_first(argv[i], &entry); rc == SQLITE_OK && entry; rc = sqlite3_vtab_in_next(argv[i], &entry)) {
  6276. i64 v = sqlite3_value_int64(entry);
  6277. rc = array_append(&item.array, &v);
  6278. if (rc != SQLITE_OK) {
  6279. goto cleanup;
  6280. }
  6281. }
  6282. if (rc != SQLITE_DONE) {
  6283. vtab_set_error(&p->base, "Error fetching next value in `x in (...)` integer expression");
  6284. goto cleanup;
  6285. }
  6286. break;
  6287. }
  6288. case VEC0_METADATA_COLUMN_KIND_TEXT: {
  6289. rc = array_init(&item.array, sizeof(struct Vec0MetadataInTextEntry), 16);
  6290. if(rc != SQLITE_OK) {
  6291. goto cleanup;
  6292. }
  6293. sqlite3_value *entry;
  6294. for (rc = sqlite3_vtab_in_first(argv[i], &entry); rc == SQLITE_OK && entry; rc = sqlite3_vtab_in_next(argv[i], &entry)) {
  6295. const char * s = (const char *) sqlite3_value_text(entry);
  6296. int n = sqlite3_value_bytes(entry);
  6297. struct Vec0MetadataInTextEntry entry;
  6298. entry.zString = sqlite3_mprintf("%.*s", n, s);
  6299. if(!entry.zString) {
  6300. rc = SQLITE_NOMEM;
  6301. goto cleanup;
  6302. }
  6303. entry.n = n;
  6304. rc = array_append(&item.array, &entry);
  6305. if (rc != SQLITE_OK) {
  6306. goto cleanup;
  6307. }
  6308. }
  6309. if (rc != SQLITE_DONE) {
  6310. vtab_set_error(&p->base, "Error fetching next value in `x in (...)` text expression");
  6311. goto cleanup;
  6312. }
  6313. break;
  6314. }
  6315. default: {
  6316. vtab_set_error(&p->base, "Internal sqlite-vec error");
  6317. goto cleanup;
  6318. }
  6319. }
  6320. rc = array_append(aMetadataIn, &item);
  6321. if(rc != SQLITE_OK) {
  6322. goto cleanup;
  6323. }
  6324. }
  6325. #endif
  6326. rc = vec0_chunks_iter(p, idxStr, argc, argv, &stmtChunks);
  6327. if (rc != SQLITE_OK) {
  6328. // IMP: V06942_23781
  6329. vtab_set_error(&p->base, "Error preparing stmtChunk: %s",
  6330. sqlite3_errmsg(p->db));
  6331. goto cleanup;
  6332. }
  6333. i64 *topk_rowids = NULL;
  6334. f32 *topk_distances = NULL;
  6335. i64 k_used = 0;
  6336. rc = vec0Filter_knn_chunks_iter(p, stmtChunks, vector_column, vectorColumnIdx,
  6337. arrayRowidsIn, aMetadataIn, idxStr, argc, argv, queryVector, k, &topk_rowids,
  6338. &topk_distances, &k_used);
  6339. if (rc != SQLITE_OK) {
  6340. goto cleanup;
  6341. }
  6342. knn_data->current_idx = 0;
  6343. knn_data->k = k;
  6344. knn_data->rowids = topk_rowids;
  6345. knn_data->distances = topk_distances;
  6346. knn_data->k_used = k_used;
  6347. pCur->knn_data = knn_data;
  6348. pCur->query_plan = VEC0_QUERY_PLAN_KNN;
  6349. rc = SQLITE_OK;
  6350. cleanup:
  6351. sqlite3_finalize(stmtChunks);
  6352. array_cleanup(arrayRowidsIn);
  6353. sqlite3_free(arrayRowidsIn);
  6354. queryVectorCleanup(queryVector);
  6355. if(aMetadataIn) {
  6356. for(size_t i = 0; i < aMetadataIn->length; i++) {
  6357. struct Vec0MetadataIn* item = &((struct Vec0MetadataIn *) aMetadataIn->z)[i];
  6358. for(size_t j = 0; j < item->array.length; j++) {
  6359. if(p->metadata_columns[item->metadata_idx].kind == VEC0_METADATA_COLUMN_KIND_TEXT) {
  6360. struct Vec0MetadataInTextEntry entry = ((struct Vec0MetadataInTextEntry*)item->array.z)[j];
  6361. sqlite3_free(entry.zString);
  6362. }
  6363. }
  6364. array_cleanup(&item->array);
  6365. }
  6366. array_cleanup(aMetadataIn);
  6367. }
  6368. sqlite3_free(aMetadataIn);
  6369. return rc;
  6370. }
  6371. int vec0Filter_fullscan(vec0_vtab *p, vec0_cursor *pCur) {
  6372. int rc;
  6373. char *zSql;
  6374. struct vec0_query_fullscan_data *fullscan_data;
  6375. fullscan_data = sqlite3_malloc(sizeof(*fullscan_data));
  6376. if (!fullscan_data) {
  6377. return SQLITE_NOMEM;
  6378. }
  6379. memset(fullscan_data, 0, sizeof(*fullscan_data));
  6380. zSql = sqlite3_mprintf(" SELECT rowid "
  6381. " FROM " VEC0_SHADOW_ROWIDS_NAME
  6382. " ORDER by chunk_id, chunk_offset ",
  6383. p->schemaName, p->tableName);
  6384. if (!zSql) {
  6385. rc = SQLITE_NOMEM;
  6386. goto error;
  6387. }
  6388. rc = sqlite3_prepare_v2(p->db, zSql, -1, &fullscan_data->rowids_stmt, NULL);
  6389. sqlite3_free(zSql);
  6390. if (rc != SQLITE_OK) {
  6391. // IMP: V09901_26739
  6392. vtab_set_error(&p->base, "Error preparing rowid scan: %s",
  6393. sqlite3_errmsg(p->db));
  6394. goto error;
  6395. }
  6396. rc = sqlite3_step(fullscan_data->rowids_stmt);
  6397. // DONE when there's no rowids, ROW when there are, both "success"
  6398. if (!(rc == SQLITE_ROW || rc == SQLITE_DONE)) {
  6399. goto error;
  6400. }
  6401. fullscan_data->done = rc == SQLITE_DONE;
  6402. pCur->query_plan = VEC0_QUERY_PLAN_FULLSCAN;
  6403. pCur->fullscan_data = fullscan_data;
  6404. return SQLITE_OK;
  6405. error:
  6406. vec0_query_fullscan_data_clear(fullscan_data);
  6407. sqlite3_free(fullscan_data);
  6408. return rc;
  6409. }
  6410. int vec0Filter_point(vec0_cursor *pCur, vec0_vtab *p, int argc,
  6411. sqlite3_value **argv) {
  6412. int rc;
  6413. assert(argc == 1);
  6414. i64 rowid;
  6415. struct vec0_query_point_data *point_data = NULL;
  6416. point_data = sqlite3_malloc(sizeof(*point_data));
  6417. if (!point_data) {
  6418. rc = SQLITE_NOMEM;
  6419. goto error;
  6420. }
  6421. memset(point_data, 0, sizeof(*point_data));
  6422. if (p->pkIsText) {
  6423. rc = vec0_rowid_from_id(p, argv[0], &rowid);
  6424. if (rc == SQLITE_EMPTY) {
  6425. goto eof;
  6426. }
  6427. if (rc != SQLITE_OK) {
  6428. goto error;
  6429. }
  6430. } else {
  6431. rowid = sqlite3_value_int64(argv[0]);
  6432. }
  6433. for (int i = 0; i < p->numVectorColumns; i++) {
  6434. rc = vec0_get_vector_data(p, rowid, i, &point_data->vectors[i], NULL);
  6435. if (rc == SQLITE_EMPTY) {
  6436. goto eof;
  6437. }
  6438. if (rc != SQLITE_OK) {
  6439. goto error;
  6440. }
  6441. }
  6442. point_data->rowid = rowid;
  6443. point_data->done = 0;
  6444. pCur->point_data = point_data;
  6445. pCur->query_plan = VEC0_QUERY_PLAN_POINT;
  6446. return SQLITE_OK;
  6447. eof:
  6448. point_data->rowid = rowid;
  6449. point_data->done = 1;
  6450. pCur->point_data = point_data;
  6451. pCur->query_plan = VEC0_QUERY_PLAN_POINT;
  6452. return SQLITE_OK;
  6453. error:
  6454. vec0_query_point_data_clear(point_data);
  6455. sqlite3_free(point_data);
  6456. return rc;
  6457. }
  6458. static int vec0Filter(sqlite3_vtab_cursor *pVtabCursor, int idxNum,
  6459. const char *idxStr, int argc, sqlite3_value **argv) {
  6460. vec0_vtab *p = (vec0_vtab *)pVtabCursor->pVtab;
  6461. vec0_cursor *pCur = (vec0_cursor *)pVtabCursor;
  6462. vec0_cursor_clear(pCur);
  6463. int idxStrLength = strlen(idxStr);
  6464. if(idxStrLength <= 0) {
  6465. return SQLITE_ERROR;
  6466. }
  6467. if((idxStrLength-1) % 4 != 0) {
  6468. return SQLITE_ERROR;
  6469. }
  6470. int numValueEntries = (idxStrLength-1) / 4;
  6471. if(numValueEntries != argc) {
  6472. return SQLITE_ERROR;
  6473. }
  6474. char query_plan = idxStr[0];
  6475. switch(query_plan) {
  6476. case VEC0_QUERY_PLAN_FULLSCAN:
  6477. return vec0Filter_fullscan(p, pCur);
  6478. case VEC0_QUERY_PLAN_KNN:
  6479. return vec0Filter_knn(pCur, p, idxNum, idxStr, argc, argv);
  6480. case VEC0_QUERY_PLAN_POINT:
  6481. return vec0Filter_point(pCur, p, argc, argv);
  6482. default:
  6483. vtab_set_error(pVtabCursor->pVtab, "unknown idxStr '%s'", idxStr);
  6484. return SQLITE_ERROR;
  6485. }
  6486. }
  6487. static int vec0Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) {
  6488. vec0_cursor *pCur = (vec0_cursor *)cur;
  6489. switch (pCur->query_plan) {
  6490. case VEC0_QUERY_PLAN_FULLSCAN: {
  6491. *pRowid = sqlite3_column_int64(pCur->fullscan_data->rowids_stmt, 0);
  6492. return SQLITE_OK;
  6493. }
  6494. case VEC0_QUERY_PLAN_POINT: {
  6495. *pRowid = pCur->point_data->rowid;
  6496. return SQLITE_OK;
  6497. }
  6498. case VEC0_QUERY_PLAN_KNN: {
  6499. vtab_set_error(cur->pVtab,
  6500. "Internal sqlite-vec error: expected point query plan in "
  6501. "vec0Rowid, found %d",
  6502. pCur->query_plan);
  6503. return SQLITE_ERROR;
  6504. }
  6505. }
  6506. return SQLITE_ERROR;
  6507. }
  6508. static int vec0Next(sqlite3_vtab_cursor *cur) {
  6509. vec0_cursor *pCur = (vec0_cursor *)cur;
  6510. switch (pCur->query_plan) {
  6511. case VEC0_QUERY_PLAN_FULLSCAN: {
  6512. if (!pCur->fullscan_data) {
  6513. return SQLITE_ERROR;
  6514. }
  6515. int rc = sqlite3_step(pCur->fullscan_data->rowids_stmt);
  6516. if (rc == SQLITE_DONE) {
  6517. pCur->fullscan_data->done = 1;
  6518. return SQLITE_OK;
  6519. }
  6520. if (rc == SQLITE_ROW) {
  6521. return SQLITE_OK;
  6522. }
  6523. return SQLITE_ERROR;
  6524. }
  6525. case VEC0_QUERY_PLAN_KNN: {
  6526. if (!pCur->knn_data) {
  6527. return SQLITE_ERROR;
  6528. }
  6529. pCur->knn_data->current_idx++;
  6530. return SQLITE_OK;
  6531. }
  6532. case VEC0_QUERY_PLAN_POINT: {
  6533. if (!pCur->point_data) {
  6534. return SQLITE_ERROR;
  6535. }
  6536. pCur->point_data->done = 1;
  6537. return SQLITE_OK;
  6538. }
  6539. }
  6540. return SQLITE_ERROR;
  6541. }
  6542. static int vec0Eof(sqlite3_vtab_cursor *cur) {
  6543. vec0_cursor *pCur = (vec0_cursor *)cur;
  6544. switch (pCur->query_plan) {
  6545. case VEC0_QUERY_PLAN_FULLSCAN: {
  6546. if (!pCur->fullscan_data) {
  6547. return 1;
  6548. }
  6549. return pCur->fullscan_data->done;
  6550. }
  6551. case VEC0_QUERY_PLAN_KNN: {
  6552. if (!pCur->knn_data) {
  6553. return 1;
  6554. }
  6555. // return (pCur->knn_data->current_idx >= pCur->knn_data->k) ||
  6556. // (pCur->knn_data->distances[pCur->knn_data->current_idx] == FLT_MAX);
  6557. return (pCur->knn_data->current_idx >= pCur->knn_data->k_used);
  6558. }
  6559. case VEC0_QUERY_PLAN_POINT: {
  6560. if (!pCur->point_data) {
  6561. return 1;
  6562. }
  6563. return pCur->point_data->done;
  6564. }
  6565. }
  6566. return 1;
  6567. }
  6568. static int vec0Column_fullscan(vec0_vtab *pVtab, vec0_cursor *pCur,
  6569. sqlite3_context *context, int i) {
  6570. if (!pCur->fullscan_data) {
  6571. sqlite3_result_error(
  6572. context, "Internal sqlite-vec error: fullscan_data is NULL.", -1);
  6573. return SQLITE_ERROR;
  6574. }
  6575. i64 rowid = sqlite3_column_int64(pCur->fullscan_data->rowids_stmt, 0);
  6576. if (i == VEC0_COLUMN_ID) {
  6577. return vec0_result_id(pVtab, context, rowid);
  6578. }
  6579. else if (vec0_column_idx_is_vector(pVtab, i)) {
  6580. void *v;
  6581. int sz;
  6582. int vector_idx = vec0_column_idx_to_vector_idx(pVtab, i);
  6583. int rc = vec0_get_vector_data(pVtab, rowid, vector_idx, &v, &sz);
  6584. if (rc != SQLITE_OK) {
  6585. return rc;
  6586. }
  6587. sqlite3_result_blob(context, v, sz, sqlite3_free);
  6588. sqlite3_result_subtype(context,
  6589. pVtab->vector_columns[vector_idx].element_type);
  6590. }
  6591. else if (i == vec0_column_distance_idx(pVtab)) {
  6592. sqlite3_result_null(context);
  6593. }
  6594. else if(vec0_column_idx_is_partition(pVtab, i)) {
  6595. int partition_idx = vec0_column_idx_to_partition_idx(pVtab, i);
  6596. sqlite3_value * v;
  6597. int rc = vec0_get_partition_value_for_rowid(pVtab, rowid, partition_idx, &v);
  6598. if(rc == SQLITE_OK) {
  6599. sqlite3_result_value(context, v);
  6600. sqlite3_value_free(v);
  6601. }else {
  6602. sqlite3_result_error_code(context, rc);
  6603. }
  6604. }
  6605. else if(vec0_column_idx_is_auxiliary(pVtab, i)) {
  6606. int auxiliary_idx = vec0_column_idx_to_auxiliary_idx(pVtab, i);
  6607. sqlite3_value * v;
  6608. int rc = vec0_get_auxiliary_value_for_rowid(pVtab, rowid, auxiliary_idx, &v);
  6609. if(rc == SQLITE_OK) {
  6610. sqlite3_result_value(context, v);
  6611. sqlite3_value_free(v);
  6612. }else {
  6613. sqlite3_result_error_code(context, rc);
  6614. }
  6615. }
  6616. else if(vec0_column_idx_is_metadata(pVtab, i)) {
  6617. if(sqlite3_vtab_nochange(context)) {
  6618. return SQLITE_OK;
  6619. }
  6620. int metadata_idx = vec0_column_idx_to_metadata_idx(pVtab, i);
  6621. int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context);
  6622. if(rc != SQLITE_OK) {
  6623. // IMP: V15466_32305
  6624. const char * zErr = sqlite3_mprintf(
  6625. "Could not extract metadata value for column %.*s at rowid %lld",
  6626. pVtab->metadata_columns[metadata_idx].name_length,
  6627. pVtab->metadata_columns[metadata_idx].name, rowid
  6628. );
  6629. if(zErr) {
  6630. sqlite3_result_error(context, zErr, -1);
  6631. sqlite3_free((void *) zErr);
  6632. }else {
  6633. sqlite3_result_error_nomem(context);
  6634. }
  6635. }
  6636. }
  6637. return SQLITE_OK;
  6638. }
  6639. static int vec0Column_point(vec0_vtab *pVtab, vec0_cursor *pCur,
  6640. sqlite3_context *context, int i) {
  6641. if (!pCur->point_data) {
  6642. sqlite3_result_error(context,
  6643. "Internal sqlite-vec error: point_data is NULL.", -1);
  6644. return SQLITE_ERROR;
  6645. }
  6646. if (i == VEC0_COLUMN_ID) {
  6647. return vec0_result_id(pVtab, context, pCur->point_data->rowid);
  6648. }
  6649. else if (i == vec0_column_distance_idx(pVtab)) {
  6650. sqlite3_result_null(context);
  6651. return SQLITE_OK;
  6652. }
  6653. else if (vec0_column_idx_is_vector(pVtab, i)) {
  6654. if (sqlite3_vtab_nochange(context)) {
  6655. sqlite3_result_null(context);
  6656. return SQLITE_OK;
  6657. }
  6658. int vector_idx = vec0_column_idx_to_vector_idx(pVtab, i);
  6659. sqlite3_result_blob(
  6660. context, pCur->point_data->vectors[vector_idx],
  6661. vector_column_byte_size(pVtab->vector_columns[vector_idx]),
  6662. SQLITE_TRANSIENT);
  6663. sqlite3_result_subtype(context,
  6664. pVtab->vector_columns[vector_idx].element_type);
  6665. return SQLITE_OK;
  6666. }
  6667. else if(vec0_column_idx_is_partition(pVtab, i)) {
  6668. if(sqlite3_vtab_nochange(context)) {
  6669. return SQLITE_OK;
  6670. }
  6671. int partition_idx = vec0_column_idx_to_partition_idx(pVtab, i);
  6672. i64 rowid = pCur->point_data->rowid;
  6673. sqlite3_value * v;
  6674. int rc = vec0_get_partition_value_for_rowid(pVtab, rowid, partition_idx, &v);
  6675. if(rc == SQLITE_OK) {
  6676. sqlite3_result_value(context, v);
  6677. sqlite3_value_free(v);
  6678. }else {
  6679. sqlite3_result_error_code(context, rc);
  6680. }
  6681. }
  6682. else if(vec0_column_idx_is_auxiliary(pVtab, i)) {
  6683. if(sqlite3_vtab_nochange(context)) {
  6684. return SQLITE_OK;
  6685. }
  6686. i64 rowid = pCur->point_data->rowid;
  6687. int auxiliary_idx = vec0_column_idx_to_auxiliary_idx(pVtab, i);
  6688. sqlite3_value * v;
  6689. int rc = vec0_get_auxiliary_value_for_rowid(pVtab, rowid, auxiliary_idx, &v);
  6690. if(rc == SQLITE_OK) {
  6691. sqlite3_result_value(context, v);
  6692. sqlite3_value_free(v);
  6693. }else {
  6694. sqlite3_result_error_code(context, rc);
  6695. }
  6696. }
  6697. else if(vec0_column_idx_is_metadata(pVtab, i)) {
  6698. if(sqlite3_vtab_nochange(context)) {
  6699. return SQLITE_OK;
  6700. }
  6701. i64 rowid = pCur->point_data->rowid;
  6702. int metadata_idx = vec0_column_idx_to_metadata_idx(pVtab, i);
  6703. int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context);
  6704. if(rc != SQLITE_OK) {
  6705. const char * zErr = sqlite3_mprintf(
  6706. "Could not extract metadata value for column %.*s at rowid %lld",
  6707. pVtab->metadata_columns[metadata_idx].name_length,
  6708. pVtab->metadata_columns[metadata_idx].name, rowid
  6709. );
  6710. if(zErr) {
  6711. sqlite3_result_error(context, zErr, -1);
  6712. sqlite3_free((void *) zErr);
  6713. }else {
  6714. sqlite3_result_error_nomem(context);
  6715. }
  6716. }
  6717. }
  6718. return SQLITE_OK;
  6719. }
  6720. static int vec0Column_knn(vec0_vtab *pVtab, vec0_cursor *pCur,
  6721. sqlite3_context *context, int i) {
  6722. if (!pCur->knn_data) {
  6723. sqlite3_result_error(context,
  6724. "Internal sqlite-vec error: knn_data is NULL.", -1);
  6725. return SQLITE_ERROR;
  6726. }
  6727. if (i == VEC0_COLUMN_ID) {
  6728. i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx];
  6729. return vec0_result_id(pVtab, context, rowid);
  6730. }
  6731. else if (i == vec0_column_distance_idx(pVtab)) {
  6732. sqlite3_result_double(
  6733. context, pCur->knn_data->distances[pCur->knn_data->current_idx]);
  6734. return SQLITE_OK;
  6735. }
  6736. else if (vec0_column_idx_is_vector(pVtab, i)) {
  6737. void *out;
  6738. int sz;
  6739. int vector_idx = vec0_column_idx_to_vector_idx(pVtab, i);
  6740. int rc = vec0_get_vector_data(
  6741. pVtab, pCur->knn_data->rowids[pCur->knn_data->current_idx], vector_idx,
  6742. &out, &sz);
  6743. if (rc != SQLITE_OK) {
  6744. return rc;
  6745. }
  6746. sqlite3_result_blob(context, out, sz, sqlite3_free);
  6747. sqlite3_result_subtype(context,
  6748. pVtab->vector_columns[vector_idx].element_type);
  6749. return SQLITE_OK;
  6750. }
  6751. else if(vec0_column_idx_is_partition(pVtab, i)) {
  6752. int partition_idx = vec0_column_idx_to_partition_idx(pVtab, i);
  6753. i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx];
  6754. sqlite3_value * v;
  6755. int rc = vec0_get_partition_value_for_rowid(pVtab, rowid, partition_idx, &v);
  6756. if(rc == SQLITE_OK) {
  6757. sqlite3_result_value(context, v);
  6758. sqlite3_value_free(v);
  6759. }else {
  6760. sqlite3_result_error_code(context, rc);
  6761. }
  6762. }
  6763. else if(vec0_column_idx_is_auxiliary(pVtab, i)) {
  6764. int auxiliary_idx = vec0_column_idx_to_auxiliary_idx(pVtab, i);
  6765. i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx];
  6766. sqlite3_value * v;
  6767. int rc = vec0_get_auxiliary_value_for_rowid(pVtab, rowid, auxiliary_idx, &v);
  6768. if(rc == SQLITE_OK) {
  6769. sqlite3_result_value(context, v);
  6770. sqlite3_value_free(v);
  6771. }else {
  6772. sqlite3_result_error_code(context, rc);
  6773. }
  6774. }
  6775. else if(vec0_column_idx_is_metadata(pVtab, i)) {
  6776. int metadata_idx = vec0_column_idx_to_metadata_idx(pVtab, i);
  6777. i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx];
  6778. int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context);
  6779. if(rc != SQLITE_OK) {
  6780. const char * zErr = sqlite3_mprintf(
  6781. "Could not extract metadata value for column %.*s at rowid %lld",
  6782. pVtab->metadata_columns[metadata_idx].name_length,
  6783. pVtab->metadata_columns[metadata_idx].name, rowid
  6784. );
  6785. if(zErr) {
  6786. sqlite3_result_error(context, zErr, -1);
  6787. sqlite3_free((void *) zErr);
  6788. }else {
  6789. sqlite3_result_error_nomem(context);
  6790. }
  6791. }
  6792. }
  6793. return SQLITE_OK;
  6794. }
  6795. static int vec0Column(sqlite3_vtab_cursor *cur, sqlite3_context *context,
  6796. int i) {
  6797. vec0_cursor *pCur = (vec0_cursor *)cur;
  6798. vec0_vtab *pVtab = (vec0_vtab *)cur->pVtab;
  6799. switch (pCur->query_plan) {
  6800. case VEC0_QUERY_PLAN_FULLSCAN: {
  6801. return vec0Column_fullscan(pVtab, pCur, context, i);
  6802. }
  6803. case VEC0_QUERY_PLAN_KNN: {
  6804. return vec0Column_knn(pVtab, pCur, context, i);
  6805. }
  6806. case VEC0_QUERY_PLAN_POINT: {
  6807. return vec0Column_point(pVtab, pCur, context, i);
  6808. }
  6809. }
  6810. return SQLITE_OK;
  6811. }
  6812. /**
  6813. * @brief Handles the "insert rowid" step of a row insert operation of a vec0
  6814. * table.
  6815. *
  6816. * This function will insert a new row into the _rowids vec0 shadow table.
  6817. *
  6818. * @param p: virtual table
  6819. * @param idValue: Value containing the inserted rowid/id value.
  6820. * @param rowid: Output rowid, will point to the "real" i64 rowid
  6821. * value that was inserted
  6822. * @return int SQLITE_OK on success, error code on failure
  6823. */
  6824. int vec0Update_InsertRowidStep(vec0_vtab *p, sqlite3_value *idValue,
  6825. i64 *rowid) {
  6826. /**
  6827. * An insert into a vec0 table can happen a few different ways:
  6828. * 1) With default INTEGER primary key: With a supplied i64 rowid
  6829. * 2) With default INTEGER primary key: WITHOUT a supplied rowid
  6830. * 3) With TEXT primary key: supplied text rowid
  6831. */
  6832. int rc;
  6833. // Option 3: vtab has a user-defined TEXT primary key, so ensure a text value
  6834. // is provided.
  6835. if (p->pkIsText) {
  6836. if (sqlite3_value_type(idValue) != SQLITE_TEXT) {
  6837. // IMP: V04200_21039
  6838. vtab_set_error(&p->base,
  6839. "The %s virtual table was declared with a TEXT primary "
  6840. "key, but a non-TEXT value was provided in an INSERT.",
  6841. p->tableName);
  6842. return SQLITE_ERROR;
  6843. }
  6844. return vec0_rowids_insert_id(p, idValue, rowid);
  6845. }
  6846. // Option 1: User supplied a i64 rowid
  6847. if (sqlite3_value_type(idValue) == SQLITE_INTEGER) {
  6848. i64 suppliedRowid = sqlite3_value_int64(idValue);
  6849. rc = vec0_rowids_insert_rowid(p, suppliedRowid);
  6850. if (rc == SQLITE_OK) {
  6851. *rowid = suppliedRowid;
  6852. }
  6853. return rc;
  6854. }
  6855. // Option 2: User did not suppled a rowid
  6856. if (sqlite3_value_type(idValue) != SQLITE_NULL) {
  6857. // IMP: V30855_14925
  6858. vtab_set_error(&p->base,
  6859. "Only integers are allows for primary key values on %s",
  6860. p->tableName);
  6861. return SQLITE_ERROR;
  6862. }
  6863. // NULL to get next auto-incremented value
  6864. return vec0_rowids_insert_id(p, NULL, rowid);
  6865. }
  6866. /**
  6867. * @brief Determines the "next available" chunk position for a newly inserted
  6868. * vec0 row.
  6869. *
  6870. * This operation may insert a new "blank" chunk the _chunks table, if there is
  6871. * no more space in previous chunks.
  6872. *
  6873. * @param p: virtual table
  6874. * @param partitionKeyValues: array of partition key column values, to constrain
  6875. * against any partition key columns.
  6876. * @param chunk_rowid: Output rowid of the chunk in the _chunks virtual table
  6877. * that has the avialabiity.
  6878. * @param chunk_offset: Output the index of the available space insert the
  6879. * chunk, based on the index of the first available validity bit.
  6880. * @param pBlobValidity: Output blob of the validity column of the available
  6881. * chunk. Will be opened with read/write permissions.
  6882. * @param pValidity: Output buffer of the original chunk's validity column.
  6883. * Needs to be cleaned up with sqlite3_free().
  6884. * @return int SQLITE_OK on success, error code on failure
  6885. */
  6886. int vec0Update_InsertNextAvailableStep(
  6887. vec0_vtab *p,
  6888. sqlite3_value ** partitionKeyValues,
  6889. i64 *chunk_rowid, i64 *chunk_offset,
  6890. sqlite3_blob **blobChunksValidity,
  6891. const unsigned char **bufferChunksValidity) {
  6892. int rc;
  6893. i64 validitySize;
  6894. *chunk_offset = -1;
  6895. rc = vec0_get_latest_chunk_rowid(p, chunk_rowid, partitionKeyValues);
  6896. if(rc == SQLITE_EMPTY) {
  6897. goto done;
  6898. }
  6899. if (rc != SQLITE_OK) {
  6900. goto cleanup;
  6901. }
  6902. rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName, "validity",
  6903. *chunk_rowid, 1, blobChunksValidity);
  6904. if (rc != SQLITE_OK) {
  6905. // IMP: V22053_06123
  6906. vtab_set_error(&p->base,
  6907. VEC_INTERAL_ERROR
  6908. "could not open validity blob on %s.%s.%lld",
  6909. p->schemaName, p->shadowChunksName, *chunk_rowid);
  6910. goto cleanup;
  6911. }
  6912. validitySize = sqlite3_blob_bytes(*blobChunksValidity);
  6913. if (validitySize != p->chunk_size / CHAR_BIT) {
  6914. // IMP: V29362_13432
  6915. vtab_set_error(&p->base,
  6916. VEC_INTERAL_ERROR
  6917. "validity blob size mismatch on "
  6918. "%s.%s.%lld, expected %lld but received %lld.",
  6919. p->schemaName, p->shadowChunksName, *chunk_rowid,
  6920. (i64)(p->chunk_size / CHAR_BIT), validitySize);
  6921. rc = SQLITE_ERROR;
  6922. goto cleanup;
  6923. }
  6924. *bufferChunksValidity = sqlite3_malloc(validitySize);
  6925. if (!(*bufferChunksValidity)) {
  6926. vtab_set_error(&p->base, VEC_INTERAL_ERROR
  6927. "Could not allocate memory for validity bitmap");
  6928. rc = SQLITE_NOMEM;
  6929. goto cleanup;
  6930. }
  6931. rc = sqlite3_blob_read(*blobChunksValidity, (void *)*bufferChunksValidity,
  6932. validitySize, 0);
  6933. if (rc != SQLITE_OK) {
  6934. vtab_set_error(&p->base,
  6935. VEC_INTERAL_ERROR
  6936. "Could not read validity bitmap for %s.%s.%lld",
  6937. p->schemaName, p->shadowChunksName, *chunk_rowid);
  6938. goto cleanup;
  6939. }
  6940. // find the next available offset, ie first `0` in the bitmap.
  6941. for (int i = 0; i < validitySize; i++) {
  6942. if ((*bufferChunksValidity)[i] == 0b11111111)
  6943. continue;
  6944. for (int j = 0; j < CHAR_BIT; j++) {
  6945. if (((((*bufferChunksValidity)[i] >> j) & 1) == 0)) {
  6946. *chunk_offset = (i * CHAR_BIT) + j;
  6947. goto done;
  6948. }
  6949. }
  6950. }
  6951. done:
  6952. // latest chunk was full, so need to create a new one
  6953. if (*chunk_offset == -1) {
  6954. rc = vec0_new_chunk(p, partitionKeyValues, chunk_rowid);
  6955. if (rc != SQLITE_OK) {
  6956. // IMP: V08441_25279
  6957. vtab_set_error(&p->base,
  6958. VEC_INTERAL_ERROR "Could not insert a new vector chunk");
  6959. rc = SQLITE_ERROR; // otherwise raises a DatabaseError and not operational
  6960. // error?
  6961. goto cleanup;
  6962. }
  6963. *chunk_offset = 0;
  6964. // blobChunksValidity and pValidity are stale, pointing to the previous
  6965. // (full) chunk. to re-assign them
  6966. rc = sqlite3_blob_close(*blobChunksValidity);
  6967. sqlite3_free((void *)*bufferChunksValidity);
  6968. *blobChunksValidity = NULL;
  6969. *bufferChunksValidity = NULL;
  6970. if (rc != SQLITE_OK) {
  6971. vtab_set_error(&p->base, VEC_INTERAL_ERROR
  6972. "unknown error, blobChunksValidity could not be closed, "
  6973. "please file an issue.");
  6974. rc = SQLITE_ERROR;
  6975. goto cleanup;
  6976. }
  6977. rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName,
  6978. "validity", *chunk_rowid, 1, blobChunksValidity);
  6979. if (rc != SQLITE_OK) {
  6980. vtab_set_error(
  6981. &p->base,
  6982. VEC_INTERAL_ERROR
  6983. "Could not open validity blob for newly created chunk %s.%s.%lld",
  6984. p->schemaName, p->shadowChunksName, *chunk_rowid);
  6985. goto cleanup;
  6986. }
  6987. validitySize = sqlite3_blob_bytes(*blobChunksValidity);
  6988. if (validitySize != p->chunk_size / CHAR_BIT) {
  6989. vtab_set_error(&p->base,
  6990. VEC_INTERAL_ERROR
  6991. "validity blob size mismatch for newly created chunk "
  6992. "%s.%s.%lld. Exepcted %lld, got %lld",
  6993. p->schemaName, p->shadowChunksName, *chunk_rowid,
  6994. p->chunk_size / CHAR_BIT, validitySize);
  6995. goto cleanup;
  6996. }
  6997. *bufferChunksValidity = sqlite3_malloc(validitySize);
  6998. rc = sqlite3_blob_read(*blobChunksValidity, (void *)*bufferChunksValidity,
  6999. validitySize, 0);
  7000. if (rc != SQLITE_OK) {
  7001. vtab_set_error(&p->base,
  7002. VEC_INTERAL_ERROR
  7003. "could not read validity blob newly created chunk "
  7004. "%s.%s.%lld",
  7005. p->schemaName, p->shadowChunksName, *chunk_rowid);
  7006. goto cleanup;
  7007. }
  7008. }
  7009. rc = SQLITE_OK;
  7010. cleanup:
  7011. return rc;
  7012. }
  7013. /**
  7014. * @brief Write the vector data into the provided vector blob at the given
  7015. * offset
  7016. *
  7017. * @param blobVectors SQLite BLOB to write to
  7018. * @param chunk_offset the "offset" (ie validity bitmap position) to write the
  7019. * vector to
  7020. * @param bVector pointer to the vector containing data
  7021. * @param dimensions how many dimensions the vector has
  7022. * @param element_type the vector type
  7023. * @return result of sqlite3_blob_write, SQLITE_OK on success, otherwise failure
  7024. */
  7025. static int
  7026. vec0_write_vector_to_vector_blob(sqlite3_blob *blobVectors, i64 chunk_offset,
  7027. const void *bVector, size_t dimensions,
  7028. enum VectorElementType element_type) {
  7029. int n;
  7030. int offset;
  7031. switch (element_type) {
  7032. case SQLITE_VEC_ELEMENT_TYPE_FLOAT32:
  7033. n = dimensions * sizeof(f32);
  7034. offset = chunk_offset * dimensions * sizeof(f32);
  7035. break;
  7036. case SQLITE_VEC_ELEMENT_TYPE_INT8:
  7037. n = dimensions * sizeof(i8);
  7038. offset = chunk_offset * dimensions * sizeof(i8);
  7039. break;
  7040. case SQLITE_VEC_ELEMENT_TYPE_BIT:
  7041. n = dimensions / CHAR_BIT;
  7042. offset = chunk_offset * dimensions / CHAR_BIT;
  7043. break;
  7044. }
  7045. return sqlite3_blob_write(blobVectors, bVector, n, offset);
  7046. }
  7047. /**
  7048. * @brief
  7049. *
  7050. * @param p vec0 virtual table
  7051. * @param chunk_rowid: which chunk to write to
  7052. * @param chunk_offset: the offset inside the chunk to write the vector to.
  7053. * @param rowid: the rowid of the inserting row
  7054. * @param vectorDatas: array of the vector data to insert
  7055. * @param blobValidity: writeable validity blob of the row's assigned chunk.
  7056. * @param validity: snapshot buffer of the valdity column from the row's
  7057. * assigned chunk.
  7058. * @return int SQLITE_OK on success, error code on failure
  7059. */
  7060. int vec0Update_InsertWriteFinalStep(vec0_vtab *p, i64 chunk_rowid,
  7061. i64 chunk_offset, i64 rowid,
  7062. void *vectorDatas[],
  7063. sqlite3_blob *blobChunksValidity,
  7064. const unsigned char *bufferChunksValidity) {
  7065. int rc, brc;
  7066. sqlite3_blob *blobChunksRowids = NULL;
  7067. // mark the validity bit for this row in the chunk's validity bitmap
  7068. // Get the byte offset of the bitmap
  7069. char unsigned bx = bufferChunksValidity[chunk_offset / CHAR_BIT];
  7070. // set the bit at the chunk_offset position inside that byte
  7071. bx = bx | (1 << (chunk_offset % CHAR_BIT));
  7072. // write that 1 byte
  7073. rc = sqlite3_blob_write(blobChunksValidity, &bx, 1, chunk_offset / CHAR_BIT);
  7074. if (rc != SQLITE_OK) {
  7075. vtab_set_error(&p->base, VEC_INTERAL_ERROR "could not mark validity bit ");
  7076. return rc;
  7077. }
  7078. // Go insert the vector data into the vector chunk shadow tables
  7079. for (int i = 0; i < p->numVectorColumns; i++) {
  7080. sqlite3_blob *blobVectors;
  7081. rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowVectorChunksNames[i],
  7082. "vectors", chunk_rowid, 1, &blobVectors);
  7083. if (rc != SQLITE_OK) {
  7084. vtab_set_error(&p->base, "Error opening vector blob at %s.%s.%lld",
  7085. p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid);
  7086. goto cleanup;
  7087. }
  7088. i64 expected =
  7089. p->chunk_size * vector_column_byte_size(p->vector_columns[i]);
  7090. i64 actual = sqlite3_blob_bytes(blobVectors);
  7091. if (actual != expected) {
  7092. // IMP: V16386_00456
  7093. vtab_set_error(
  7094. &p->base,
  7095. VEC_INTERAL_ERROR
  7096. "vector blob size mismatch on %s.%s.%lld. Expected %lld, actual %lld",
  7097. p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid, expected,
  7098. actual);
  7099. rc = SQLITE_ERROR;
  7100. // already error, can ignore result code
  7101. sqlite3_blob_close(blobVectors);
  7102. goto cleanup;
  7103. };
  7104. rc = vec0_write_vector_to_vector_blob(
  7105. blobVectors, chunk_offset, vectorDatas[i],
  7106. p->vector_columns[i].dimensions, p->vector_columns[i].element_type);
  7107. if (rc != SQLITE_OK) {
  7108. vtab_set_error(&p->base,
  7109. VEC_INTERAL_ERROR
  7110. "could not write vector blob on %s.%s.%lld",
  7111. p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid);
  7112. rc = SQLITE_ERROR;
  7113. // already error, can ignore result code
  7114. sqlite3_blob_close(blobVectors);
  7115. goto cleanup;
  7116. }
  7117. rc = sqlite3_blob_close(blobVectors);
  7118. if (rc != SQLITE_OK) {
  7119. vtab_set_error(&p->base,
  7120. VEC_INTERAL_ERROR
  7121. "could not close vector blob on %s.%s.%lld",
  7122. p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid);
  7123. rc = SQLITE_ERROR;
  7124. goto cleanup;
  7125. }
  7126. }
  7127. // write the new rowid to the rowids column of the _chunks table
  7128. rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName, "rowids",
  7129. chunk_rowid, 1, &blobChunksRowids);
  7130. if (rc != SQLITE_OK) {
  7131. // IMP: V09221_26060
  7132. vtab_set_error(&p->base,
  7133. VEC_INTERAL_ERROR "could not open rowids blob on %s.%s.%lld",
  7134. p->schemaName, p->shadowChunksName, chunk_rowid);
  7135. goto cleanup;
  7136. }
  7137. i64 expected = p->chunk_size * sizeof(i64);
  7138. i64 actual = sqlite3_blob_bytes(blobChunksRowids);
  7139. if (expected != actual) {
  7140. // IMP: V12779_29618
  7141. vtab_set_error(
  7142. &p->base,
  7143. VEC_INTERAL_ERROR
  7144. "rowids blob size mismatch on %s.%s.%lld. Expected %lld, actual %lld",
  7145. p->schemaName, p->shadowChunksName, chunk_rowid, expected, actual);
  7146. rc = SQLITE_ERROR;
  7147. goto cleanup;
  7148. }
  7149. rc = sqlite3_blob_write(blobChunksRowids, &rowid, sizeof(i64),
  7150. chunk_offset * sizeof(i64));
  7151. if (rc != SQLITE_OK) {
  7152. vtab_set_error(
  7153. &p->base, VEC_INTERAL_ERROR "could not write rowids blob on %s.%s.%lld",
  7154. p->schemaName, p->shadowChunksName, chunk_rowid);
  7155. rc = SQLITE_ERROR;
  7156. goto cleanup;
  7157. }
  7158. // Now with all the vectors inserted, go back and update the _rowids table
  7159. // with the new chunk_rowid/chunk_offset values
  7160. rc = vec0_rowids_update_position(p, rowid, chunk_rowid, chunk_offset);
  7161. cleanup:
  7162. brc = sqlite3_blob_close(blobChunksRowids);
  7163. if ((rc == SQLITE_OK) && (brc != SQLITE_OK)) {
  7164. vtab_set_error(
  7165. &p->base, VEC_INTERAL_ERROR "could not close rowids blob on %s.%s.%lld",
  7166. p->schemaName, p->shadowChunksName, chunk_rowid);
  7167. return brc;
  7168. }
  7169. return rc;
  7170. }
  7171. int vec0_write_metadata_value(vec0_vtab *p, int metadata_column_idx, i64 rowid, i64 chunk_id, i64 chunk_offset, sqlite3_value * v, int isupdate) {
  7172. int rc;
  7173. struct Vec0MetadataColumnDefinition * metadata_column = &p->metadata_columns[metadata_column_idx];
  7174. vec0_metadata_column_kind kind = metadata_column->kind;
  7175. // verify input value matches column type
  7176. switch(kind) {
  7177. case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
  7178. if(sqlite3_value_type(v) != SQLITE_INTEGER || ((sqlite3_value_int(v) != 0) && (sqlite3_value_int(v) != 1))) {
  7179. rc = SQLITE_ERROR;
  7180. vtab_set_error(&p->base, "Expected 0 or 1 for BOOLEAN metadata column %.*s", metadata_column->name_length, metadata_column->name);
  7181. goto done;
  7182. }
  7183. break;
  7184. }
  7185. case VEC0_METADATA_COLUMN_KIND_INTEGER: {
  7186. if(sqlite3_value_type(v) != SQLITE_INTEGER) {
  7187. rc = SQLITE_ERROR;
  7188. vtab_set_error(&p->base, "Expected integer for INTEGER metadata column %.*s, received %s", metadata_column->name_length, metadata_column->name, type_name(sqlite3_value_type(v)));
  7189. goto done;
  7190. }
  7191. break;
  7192. }
  7193. case VEC0_METADATA_COLUMN_KIND_FLOAT: {
  7194. if(sqlite3_value_type(v) != SQLITE_FLOAT) {
  7195. rc = SQLITE_ERROR;
  7196. vtab_set_error(&p->base, "Expected float for FLOAT metadata column %.*s, received %s", metadata_column->name_length, metadata_column->name, type_name(sqlite3_value_type(v)));
  7197. goto done;
  7198. }
  7199. break;
  7200. }
  7201. case VEC0_METADATA_COLUMN_KIND_TEXT: {
  7202. if(sqlite3_value_type(v) != SQLITE_TEXT) {
  7203. rc = SQLITE_ERROR;
  7204. vtab_set_error(&p->base, "Expected text for TEXT metadata column %.*s, received %s", metadata_column->name_length, metadata_column->name, type_name(sqlite3_value_type(v)));
  7205. goto done;
  7206. }
  7207. break;
  7208. }
  7209. }
  7210. sqlite3_blob * blobValue = NULL;
  7211. rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_column_idx], "data", chunk_id, 1, &blobValue);
  7212. if(rc != SQLITE_OK) {
  7213. goto done;
  7214. }
  7215. switch(kind) {
  7216. case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
  7217. u8 block;
  7218. int value = sqlite3_value_int(v);
  7219. rc = sqlite3_blob_read(blobValue, &block, sizeof(u8), (int) (chunk_offset / CHAR_BIT));
  7220. if(rc != SQLITE_OK) {
  7221. goto done;
  7222. }
  7223. if (value) {
  7224. block |= 1 << (chunk_offset % CHAR_BIT);
  7225. } else {
  7226. block &= ~(1 << (chunk_offset % CHAR_BIT));
  7227. }
  7228. rc = sqlite3_blob_write(blobValue, &block, sizeof(u8), chunk_offset / CHAR_BIT);
  7229. break;
  7230. }
  7231. case VEC0_METADATA_COLUMN_KIND_INTEGER: {
  7232. i64 value = sqlite3_value_int64(v);
  7233. rc = sqlite3_blob_write(blobValue, &value, sizeof(value), chunk_offset * sizeof(i64));
  7234. break;
  7235. }
  7236. case VEC0_METADATA_COLUMN_KIND_FLOAT: {
  7237. double value = sqlite3_value_double(v);
  7238. rc = sqlite3_blob_write(blobValue, &value, sizeof(value), chunk_offset * sizeof(double));
  7239. break;
  7240. }
  7241. case VEC0_METADATA_COLUMN_KIND_TEXT: {
  7242. int prev_n;
  7243. rc = sqlite3_blob_read(blobValue, &prev_n, sizeof(int), chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
  7244. if(rc != SQLITE_OK) {
  7245. goto done;
  7246. }
  7247. const char * s = (const char *) sqlite3_value_text(v);
  7248. int n = sqlite3_value_bytes(v);
  7249. u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
  7250. memset(view, 0, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
  7251. memcpy(view, &n, sizeof(int));
  7252. memcpy(view+4, s, min(n, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH-4));
  7253. rc = sqlite3_blob_write(blobValue, &view, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH, chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
  7254. if(n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
  7255. const char * zSql;
  7256. if(isupdate && (prev_n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH)) {
  7257. zSql = sqlite3_mprintf("UPDATE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " SET data = ?2 WHERE rowid = ?1", p->schemaName, p->tableName, metadata_column_idx);
  7258. }else {
  7259. zSql = sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " (rowid, data) VALUES (?1, ?2)", p->schemaName, p->tableName, metadata_column_idx);
  7260. }
  7261. if(!zSql) {
  7262. rc = SQLITE_NOMEM;
  7263. goto done;
  7264. }
  7265. sqlite3_stmt * stmt;
  7266. rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
  7267. if(rc != SQLITE_OK) {
  7268. goto done;
  7269. }
  7270. sqlite3_bind_int64(stmt, 1, rowid);
  7271. sqlite3_bind_text(stmt, 2, s, n, SQLITE_STATIC);
  7272. rc = sqlite3_step(stmt);
  7273. sqlite3_finalize(stmt);
  7274. if(rc != SQLITE_DONE) {
  7275. rc = SQLITE_ERROR;
  7276. goto done;
  7277. }
  7278. }
  7279. else if(prev_n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
  7280. const char * zSql = sqlite3_mprintf("DELETE FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " WHERE rowid = ?", p->schemaName, p->tableName, metadata_column_idx);
  7281. if(!zSql) {
  7282. rc = SQLITE_NOMEM;
  7283. goto done;
  7284. }
  7285. sqlite3_stmt * stmt;
  7286. rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
  7287. if(rc != SQLITE_OK) {
  7288. goto done;
  7289. }
  7290. sqlite3_bind_int64(stmt, 1, rowid);
  7291. rc = sqlite3_step(stmt);
  7292. sqlite3_finalize(stmt);
  7293. if(rc != SQLITE_DONE) {
  7294. rc = SQLITE_ERROR;
  7295. goto done;
  7296. }
  7297. }
  7298. break;
  7299. }
  7300. }
  7301. if(rc != SQLITE_OK) {
  7302. }
  7303. rc = sqlite3_blob_close(blobValue);
  7304. if(rc != SQLITE_OK) {
  7305. goto done;
  7306. }
  7307. done:
  7308. return rc;
  7309. }
  7310. /**
  7311. * @brief Handles INSERT INTO operations on a vec0 table.
  7312. *
  7313. * @return int SQLITE_OK on success, otherwise error code on failure
  7314. */
  7315. int vec0Update_Insert(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv,
  7316. sqlite_int64 *pRowid) {
  7317. UNUSED_PARAMETER(argc);
  7318. vec0_vtab *p = (vec0_vtab *)pVTab;
  7319. int rc;
  7320. // Rowid for the inserted row, deterimined by the inserted ID + _rowids shadow
  7321. // table
  7322. i64 rowid;
  7323. // Array to hold the vector data of the inserted row. Individual elements will
  7324. // have a lifetime bound to the argv[..] values.
  7325. void *vectorDatas[VEC0_MAX_VECTOR_COLUMNS];
  7326. // Array to hold cleanup functions for vectorDatas[]
  7327. vector_cleanup cleanups[VEC0_MAX_VECTOR_COLUMNS];
  7328. sqlite3_value * partitionKeyValues[VEC0_MAX_PARTITION_COLUMNS];
  7329. // Rowid of the chunk in the _chunks shadow table that the row will be a part
  7330. // of.
  7331. i64 chunk_rowid;
  7332. // offset within the chunk where the rowid belongs
  7333. i64 chunk_offset;
  7334. // a write-able blob of the validity column for the given chunk. Used to mark
  7335. // validity bit
  7336. sqlite3_blob *blobChunksValidity = NULL;
  7337. // buffer for the valididty column for the given chunk. Maybe not needed here?
  7338. const unsigned char *bufferChunksValidity = NULL;
  7339. int numReadVectors = 0;
  7340. // Read all provided partition key values into partitionKeyValues
  7341. for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
  7342. if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_PARTITION) {
  7343. continue;
  7344. }
  7345. int partition_key_idx = p->user_column_idxs[i];
  7346. partitionKeyValues[partition_key_idx] = argv[2+VEC0_COLUMN_USERN_START + i];
  7347. int new_value_type = sqlite3_value_type(partitionKeyValues[partition_key_idx]);
  7348. if((new_value_type != SQLITE_NULL) && (new_value_type != p->paritition_columns[partition_key_idx].type)) {
  7349. // IMP: V11454_28292
  7350. vtab_set_error(
  7351. pVTab,
  7352. "Parition key type mismatch: The partition key column %.*s has type %s, but %s was provided.",
  7353. p->paritition_columns[partition_key_idx].name_length,
  7354. p->paritition_columns[partition_key_idx].name,
  7355. type_name(p->paritition_columns[partition_key_idx].type),
  7356. type_name(new_value_type)
  7357. );
  7358. rc = SQLITE_ERROR;
  7359. goto cleanup;
  7360. }
  7361. }
  7362. // read all the inserted vectors into vectorDatas, validate their lengths.
  7363. for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
  7364. if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_VECTOR) {
  7365. continue;
  7366. }
  7367. int vector_column_idx = p->user_column_idxs[i];
  7368. sqlite3_value *valueVector = argv[2 + VEC0_COLUMN_USERN_START + i];
  7369. size_t dimensions;
  7370. char *pzError;
  7371. enum VectorElementType elementType;
  7372. rc = vector_from_value(valueVector, &vectorDatas[vector_column_idx], &dimensions,
  7373. &elementType, &cleanups[vector_column_idx], &pzError);
  7374. if (rc != SQLITE_OK) {
  7375. // IMP: V06519_23358
  7376. vtab_set_error(
  7377. pVTab, "Inserted vector for the \"%.*s\" column is invalid: %z",
  7378. p->vector_columns[vector_column_idx].name_length, p->vector_columns[vector_column_idx].name, pzError);
  7379. rc = SQLITE_ERROR;
  7380. goto cleanup;
  7381. }
  7382. numReadVectors++;
  7383. if (elementType != p->vector_columns[vector_column_idx].element_type) {
  7384. // IMP: V08221_25059
  7385. vtab_set_error(
  7386. pVTab,
  7387. "Inserted vector for the \"%.*s\" column is expected to be of type "
  7388. "%s, but a %s vector was provided.",
  7389. p->vector_columns[i].name_length, p->vector_columns[i].name,
  7390. vector_subtype_name(p->vector_columns[i].element_type),
  7391. vector_subtype_name(elementType));
  7392. rc = SQLITE_ERROR;
  7393. goto cleanup;
  7394. }
  7395. if (dimensions != p->vector_columns[vector_column_idx].dimensions) {
  7396. // IMP: V01145_17984
  7397. vtab_set_error(
  7398. pVTab,
  7399. "Dimension mismatch for inserted vector for the \"%.*s\" column. "
  7400. "Expected %d dimensions but received %d.",
  7401. p->vector_columns[vector_column_idx].name_length, p->vector_columns[vector_column_idx].name,
  7402. p->vector_columns[vector_column_idx].dimensions, dimensions);
  7403. rc = SQLITE_ERROR;
  7404. goto cleanup;
  7405. }
  7406. }
  7407. // Cannot insert a value in the hidden "distance" column
  7408. if (sqlite3_value_type(argv[2 + vec0_column_distance_idx(p)]) !=
  7409. SQLITE_NULL) {
  7410. // IMP: V24228_08298
  7411. vtab_set_error(pVTab,
  7412. "A value was provided for the hidden \"distance\" column.");
  7413. rc = SQLITE_ERROR;
  7414. goto cleanup;
  7415. }
  7416. // Cannot insert a value in the hidden "k" column
  7417. if (sqlite3_value_type(argv[2 + vec0_column_k_idx(p)]) != SQLITE_NULL) {
  7418. // IMP: V11875_28713
  7419. vtab_set_error(pVTab, "A value was provided for the hidden \"k\" column.");
  7420. rc = SQLITE_ERROR;
  7421. goto cleanup;
  7422. }
  7423. // Step #1: Insert/get a rowid for this row, from the _rowids table.
  7424. rc = vec0Update_InsertRowidStep(p, argv[2 + VEC0_COLUMN_ID], &rowid);
  7425. if (rc != SQLITE_OK) {
  7426. goto cleanup;
  7427. }
  7428. // Step #2: Find the next "available" position in the _chunks table for this
  7429. // row.
  7430. rc = vec0Update_InsertNextAvailableStep(p, partitionKeyValues,
  7431. &chunk_rowid, &chunk_offset,
  7432. &blobChunksValidity,
  7433. &bufferChunksValidity);
  7434. if (rc != SQLITE_OK) {
  7435. goto cleanup;
  7436. }
  7437. // Step #3: With the next available chunk position, write out all the vectors
  7438. // to their specified location.
  7439. rc = vec0Update_InsertWriteFinalStep(p, chunk_rowid, chunk_offset, rowid,
  7440. vectorDatas, blobChunksValidity,
  7441. bufferChunksValidity);
  7442. if (rc != SQLITE_OK) {
  7443. goto cleanup;
  7444. }
  7445. if(p->numAuxiliaryColumns > 0) {
  7446. sqlite3_stmt *stmt;
  7447. sqlite3_str * s = sqlite3_str_new(NULL);
  7448. sqlite3_str_appendf(s, "INSERT INTO " VEC0_SHADOW_AUXILIARY_NAME "(rowid ", p->schemaName, p->tableName);
  7449. for(int i = 0; i < p->numAuxiliaryColumns; i++) {
  7450. sqlite3_str_appendf(s, ", value%02d", i);
  7451. }
  7452. sqlite3_str_appendall(s, ") VALUES (? ");
  7453. for(int i = 0; i < p->numAuxiliaryColumns; i++) {
  7454. sqlite3_str_appendall(s, ", ?");
  7455. }
  7456. sqlite3_str_appendall(s, ")");
  7457. char * zSql = sqlite3_str_finish(s);
  7458. // TODO double check error handling ehre
  7459. if(!zSql) {
  7460. rc = SQLITE_NOMEM;
  7461. goto cleanup;
  7462. }
  7463. rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
  7464. if(rc != SQLITE_OK) {
  7465. goto cleanup;
  7466. }
  7467. sqlite3_bind_int64(stmt, 1, rowid);
  7468. for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
  7469. if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY) {
  7470. continue;
  7471. }
  7472. int auxiliary_key_idx = p->user_column_idxs[i];
  7473. sqlite3_value * v = argv[2+VEC0_COLUMN_USERN_START + i];
  7474. int v_type = sqlite3_value_type(v);
  7475. if(v_type != SQLITE_NULL && (v_type != p->auxiliary_columns[auxiliary_key_idx].type)) {
  7476. sqlite3_finalize(stmt);
  7477. rc = SQLITE_CONSTRAINT;
  7478. vtab_set_error(
  7479. pVTab,
  7480. "Auxiliary column type mismatch: The auxiliary column %.*s has type %s, but %s was provided.",
  7481. p->auxiliary_columns[auxiliary_key_idx].name_length,
  7482. p->auxiliary_columns[auxiliary_key_idx].name,
  7483. type_name(p->auxiliary_columns[auxiliary_key_idx].type),
  7484. type_name(v_type)
  7485. );
  7486. goto cleanup;
  7487. }
  7488. // first 1 is for 1-based indexing on sqlite3_bind_*, second 1 is to account for initial rowid parameter
  7489. sqlite3_bind_value(stmt, 1 + 1 + auxiliary_key_idx, v);
  7490. }
  7491. rc = sqlite3_step(stmt);
  7492. if(rc != SQLITE_DONE) {
  7493. sqlite3_finalize(stmt);
  7494. rc = SQLITE_ERROR;
  7495. goto cleanup;
  7496. }
  7497. sqlite3_finalize(stmt);
  7498. }
  7499. for(int i = 0; i < vec0_num_defined_user_columns(p); i++) {
  7500. if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_METADATA) {
  7501. continue;
  7502. }
  7503. int metadata_idx = p->user_column_idxs[i];
  7504. sqlite3_value *v = argv[2 + VEC0_COLUMN_USERN_START + i];
  7505. rc = vec0_write_metadata_value(p, metadata_idx, rowid, chunk_rowid, chunk_offset, v, 0);
  7506. if(rc != SQLITE_OK) {
  7507. goto cleanup;
  7508. }
  7509. }
  7510. *pRowid = rowid;
  7511. rc = SQLITE_OK;
  7512. cleanup:
  7513. for (int i = 0; i < numReadVectors; i++) {
  7514. cleanups[i](vectorDatas[i]);
  7515. }
  7516. sqlite3_free((void *)bufferChunksValidity);
  7517. int brc = sqlite3_blob_close(blobChunksValidity);
  7518. if ((rc == SQLITE_OK) && (brc != SQLITE_OK)) {
  7519. vtab_set_error(&p->base,
  7520. VEC_INTERAL_ERROR "unknown error, blobChunksValidity could "
  7521. "not be closed, please file an issue");
  7522. return brc;
  7523. }
  7524. return rc;
  7525. }
  7526. int vec0Update_Delete_ClearValidity(vec0_vtab *p, i64 chunk_id,
  7527. u64 chunk_offset) {
  7528. int rc, brc;
  7529. sqlite3_blob *blobChunksValidity = NULL;
  7530. char unsigned bx;
  7531. int validityOffset = chunk_offset / CHAR_BIT;
  7532. // 2. ensure chunks.validity bit is 1, then set to 0
  7533. rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName, "validity",
  7534. chunk_id, 1, &blobChunksValidity);
  7535. if (rc != SQLITE_OK) {
  7536. // IMP: V26002_10073
  7537. vtab_set_error(&p->base, "could not open validity blob for %s.%s.%lld",
  7538. p->schemaName, p->shadowChunksName, chunk_id);
  7539. return SQLITE_ERROR;
  7540. }
  7541. // will skip the sqlite3_blob_bytes(blobChunksValidity) check for now,
  7542. // the read below would catch it
  7543. rc = sqlite3_blob_read(blobChunksValidity, &bx, sizeof(bx), validityOffset);
  7544. if (rc != SQLITE_OK) {
  7545. // IMP: V21193_05263
  7546. vtab_set_error(
  7547. &p->base, "could not read validity blob for %s.%s.%lld at %d",
  7548. p->schemaName, p->shadowChunksName, chunk_id, validityOffset);
  7549. goto cleanup;
  7550. }
  7551. if (!(bx >> (chunk_offset % CHAR_BIT))) {
  7552. // IMP: V21193_05263
  7553. rc = SQLITE_ERROR;
  7554. vtab_set_error(
  7555. &p->base,
  7556. "vec0 deletion error: validity bit is not set for %s.%s.%lld at %d",
  7557. p->schemaName, p->shadowChunksName, chunk_id, validityOffset);
  7558. goto cleanup;
  7559. }
  7560. char unsigned mask = ~(1 << (chunk_offset % CHAR_BIT));
  7561. char result = bx & mask;
  7562. rc = sqlite3_blob_write(blobChunksValidity, &result, sizeof(bx),
  7563. validityOffset);
  7564. if (rc != SQLITE_OK) {
  7565. vtab_set_error(
  7566. &p->base, "could not write to validity blob for %s.%s.%lld at %d",
  7567. p->schemaName, p->shadowChunksName, chunk_id, validityOffset);
  7568. goto cleanup;
  7569. }
  7570. cleanup:
  7571. brc = sqlite3_blob_close(blobChunksValidity);
  7572. if (rc != SQLITE_OK)
  7573. return rc;
  7574. if (brc != SQLITE_OK) {
  7575. vtab_set_error(&p->base,
  7576. "vec0 deletion error: Error commiting validity blob "
  7577. "transaction on %s.%s.%lld at %d",
  7578. p->schemaName, p->shadowChunksName, chunk_id,
  7579. validityOffset);
  7580. return brc;
  7581. }
  7582. return SQLITE_OK;
  7583. }
  7584. int vec0Update_Delete_DeleteRowids(vec0_vtab *p, i64 rowid) {
  7585. int rc;
  7586. sqlite3_stmt *stmt = NULL;
  7587. char *zSql =
  7588. sqlite3_mprintf("DELETE FROM " VEC0_SHADOW_ROWIDS_NAME " WHERE rowid = ?",
  7589. p->schemaName, p->tableName);
  7590. if (!zSql) {
  7591. return SQLITE_NOMEM;
  7592. }
  7593. rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
  7594. sqlite3_free(zSql);
  7595. if (rc != SQLITE_OK) {
  7596. goto cleanup;
  7597. }
  7598. sqlite3_bind_int64(stmt, 1, rowid);
  7599. rc = sqlite3_step(stmt);
  7600. if (rc != SQLITE_DONE) {
  7601. goto cleanup;
  7602. }
  7603. rc = SQLITE_OK;
  7604. cleanup:
  7605. sqlite3_finalize(stmt);
  7606. return rc;
  7607. }
  7608. int vec0Update_Delete_DeleteAux(vec0_vtab *p, i64 rowid) {
  7609. int rc;
  7610. sqlite3_stmt *stmt = NULL;
  7611. char *zSql =
  7612. sqlite3_mprintf("DELETE FROM " VEC0_SHADOW_AUXILIARY_NAME " WHERE rowid = ?",
  7613. p->schemaName, p->tableName);
  7614. if (!zSql) {
  7615. return SQLITE_NOMEM;
  7616. }
  7617. rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
  7618. sqlite3_free(zSql);
  7619. if (rc != SQLITE_OK) {
  7620. goto cleanup;
  7621. }
  7622. sqlite3_bind_int64(stmt, 1, rowid);
  7623. rc = sqlite3_step(stmt);
  7624. if (rc != SQLITE_DONE) {
  7625. goto cleanup;
  7626. }
  7627. rc = SQLITE_OK;
  7628. cleanup:
  7629. sqlite3_finalize(stmt);
  7630. return rc;
  7631. }
  7632. int vec0Update_Delete_ClearMetadata(vec0_vtab *p, int metadata_idx, i64 rowid, i64 chunk_id,
  7633. u64 chunk_offset) {
  7634. int rc;
  7635. sqlite3_blob * blobValue;
  7636. vec0_metadata_column_kind kind = p->metadata_columns[metadata_idx].kind;
  7637. rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_idx], "data", chunk_id, 1, &blobValue);
  7638. if(rc != SQLITE_OK) {
  7639. return rc;
  7640. }
  7641. switch(kind) {
  7642. case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
  7643. u8 block;
  7644. rc = sqlite3_blob_read(blobValue, &block, sizeof(u8), (int) (chunk_offset / CHAR_BIT));
  7645. if(rc != SQLITE_OK) {
  7646. goto done;
  7647. }
  7648. block &= ~(1 << (chunk_offset % CHAR_BIT));
  7649. rc = sqlite3_blob_write(blobValue, &block, sizeof(u8), chunk_offset / CHAR_BIT);
  7650. break;
  7651. }
  7652. case VEC0_METADATA_COLUMN_KIND_INTEGER: {
  7653. i64 v = 0;
  7654. rc = sqlite3_blob_write(blobValue, &v, sizeof(v), chunk_offset * sizeof(i64));
  7655. break;
  7656. }
  7657. case VEC0_METADATA_COLUMN_KIND_FLOAT: {
  7658. double v = 0;
  7659. rc = sqlite3_blob_write(blobValue, &v, sizeof(v), chunk_offset * sizeof(double));
  7660. break;
  7661. }
  7662. case VEC0_METADATA_COLUMN_KIND_TEXT: {
  7663. int n;
  7664. rc = sqlite3_blob_read(blobValue, &n, sizeof(int), chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
  7665. if(rc != SQLITE_OK) {
  7666. goto done;
  7667. }
  7668. u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
  7669. memset(view, 0, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
  7670. rc = sqlite3_blob_write(blobValue, &view, sizeof(view), chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
  7671. if(rc != SQLITE_OK) {
  7672. goto done;
  7673. }
  7674. if(n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
  7675. const char * zSql = sqlite3_mprintf("DELETE FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " WHERE rowid = ?", p->schemaName, p->tableName, metadata_idx);
  7676. if(!zSql) {
  7677. rc = SQLITE_NOMEM;
  7678. goto done;
  7679. }
  7680. sqlite3_stmt * stmt;
  7681. rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
  7682. if(rc != SQLITE_OK) {
  7683. goto done;
  7684. }
  7685. sqlite3_bind_int64(stmt, 1, rowid);
  7686. rc = sqlite3_step(stmt);
  7687. if(rc != SQLITE_DONE) {
  7688. rc = SQLITE_ERROR;
  7689. goto done;
  7690. }
  7691. sqlite3_finalize(stmt);
  7692. }
  7693. break;
  7694. }
  7695. }
  7696. int rc2;
  7697. done:
  7698. rc2 = sqlite3_blob_close(blobValue);
  7699. if(rc == SQLITE_OK) {
  7700. return rc2;
  7701. }
  7702. return rc;
  7703. }
  7704. int vec0Update_Delete(sqlite3_vtab *pVTab, sqlite3_value *idValue) {
  7705. vec0_vtab *p = (vec0_vtab *)pVTab;
  7706. int rc;
  7707. i64 rowid;
  7708. i64 chunk_id;
  7709. i64 chunk_offset;
  7710. if (p->pkIsText) {
  7711. rc = vec0_rowid_from_id(p, idValue, &rowid);
  7712. if (rc != SQLITE_OK) {
  7713. return rc;
  7714. }
  7715. } else {
  7716. rowid = sqlite3_value_int64(idValue);
  7717. }
  7718. // 1. Find chunk position for given rowid
  7719. // 2. Ensure that validity bit for position is 1, then set to 0
  7720. // 3. Zero out rowid in chunks.rowid
  7721. // 4. Zero out vector data in all vector column chunks
  7722. // 5. Delete value in _rowids table
  7723. // 1. get chunk_id and chunk_offset from _rowids
  7724. rc = vec0_get_chunk_position(p, rowid, NULL, &chunk_id, &chunk_offset);
  7725. if (rc != SQLITE_OK) {
  7726. return rc;
  7727. }
  7728. rc = vec0Update_Delete_ClearValidity(p, chunk_id, chunk_offset);
  7729. if (rc != SQLITE_OK) {
  7730. return rc;
  7731. }
  7732. // 3. zero out rowid in chunks.rowids
  7733. // https://github.com/asg017/sqlite-vec/issues/54
  7734. // 4. zero out any data in vector chunks tables
  7735. // https://github.com/asg017/sqlite-vec/issues/54
  7736. // 5. delete from _rowids table
  7737. rc = vec0Update_Delete_DeleteRowids(p, rowid);
  7738. if (rc != SQLITE_OK) {
  7739. return rc;
  7740. }
  7741. // 6. delete any auxiliary rows
  7742. if(p->numAuxiliaryColumns > 0) {
  7743. rc = vec0Update_Delete_DeleteAux(p, rowid);
  7744. if (rc != SQLITE_OK) {
  7745. return rc;
  7746. }
  7747. }
  7748. // 6. delete metadata
  7749. for(int i = 0; i < p->numMetadataColumns; i++) {
  7750. rc = vec0Update_Delete_ClearMetadata(p, i, rowid, chunk_id, chunk_offset);
  7751. }
  7752. return SQLITE_OK;
  7753. }
  7754. int vec0Update_UpdateAuxColumn(vec0_vtab *p, int auxiliary_column_idx, sqlite3_value * value, i64 rowid) {
  7755. int rc;
  7756. sqlite3_stmt *stmt;
  7757. const char * zSql = sqlite3_mprintf("UPDATE " VEC0_SHADOW_AUXILIARY_NAME " SET value%02d = ? WHERE rowid = ?", p->schemaName, p->tableName, auxiliary_column_idx);
  7758. if(!zSql) {
  7759. return SQLITE_NOMEM;
  7760. }
  7761. rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
  7762. if(rc != SQLITE_OK) {
  7763. return rc;
  7764. }
  7765. sqlite3_bind_value(stmt, 1, value);
  7766. sqlite3_bind_int64(stmt, 2, rowid);
  7767. rc = sqlite3_step(stmt);
  7768. if(rc != SQLITE_DONE) {
  7769. sqlite3_finalize(stmt);
  7770. return SQLITE_ERROR;
  7771. }
  7772. sqlite3_finalize(stmt);
  7773. return SQLITE_OK;
  7774. }
  7775. int vec0Update_UpdateVectorColumn(vec0_vtab *p, i64 chunk_id, i64 chunk_offset,
  7776. int i, sqlite3_value *valueVector) {
  7777. int rc;
  7778. sqlite3_blob *blobVectors = NULL;
  7779. char *pzError;
  7780. size_t dimensions;
  7781. enum VectorElementType elementType;
  7782. void *vector;
  7783. vector_cleanup cleanup = vector_cleanup_noop;
  7784. // https://github.com/asg017/sqlite-vec/issues/53
  7785. rc = vector_from_value(valueVector, &vector, &dimensions, &elementType,
  7786. &cleanup, &pzError);
  7787. if (rc != SQLITE_OK) {
  7788. // IMP: V15203_32042
  7789. vtab_set_error(
  7790. &p->base, "Updated vector for the \"%.*s\" column is invalid: %z",
  7791. p->vector_columns[i].name_length, p->vector_columns[i].name, pzError);
  7792. rc = SQLITE_ERROR;
  7793. goto cleanup;
  7794. }
  7795. if (elementType != p->vector_columns[i].element_type) {
  7796. // IMP: V03643_20481
  7797. vtab_set_error(
  7798. &p->base,
  7799. "Updated vector for the \"%.*s\" column is expected to be of type "
  7800. "%s, but a %s vector was provided.",
  7801. p->vector_columns[i].name_length, p->vector_columns[i].name,
  7802. vector_subtype_name(p->vector_columns[i].element_type),
  7803. vector_subtype_name(elementType));
  7804. rc = SQLITE_ERROR;
  7805. goto cleanup;
  7806. }
  7807. if (dimensions != p->vector_columns[i].dimensions) {
  7808. // IMP: V25739_09810
  7809. vtab_set_error(
  7810. &p->base,
  7811. "Dimension mismatch for new updated vector for the \"%.*s\" column. "
  7812. "Expected %d dimensions but received %d.",
  7813. p->vector_columns[i].name_length, p->vector_columns[i].name,
  7814. p->vector_columns[i].dimensions, dimensions);
  7815. rc = SQLITE_ERROR;
  7816. goto cleanup;
  7817. }
  7818. rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowVectorChunksNames[i],
  7819. "vectors", chunk_id, 1, &blobVectors);
  7820. if (rc != SQLITE_OK) {
  7821. vtab_set_error(&p->base, "Could not open vectors blob for %s.%s.%lld",
  7822. p->schemaName, p->shadowVectorChunksNames[i], chunk_id);
  7823. goto cleanup;
  7824. }
  7825. rc = vec0_write_vector_to_vector_blob(blobVectors, chunk_offset, vector,
  7826. p->vector_columns[i].dimensions,
  7827. p->vector_columns[i].element_type);
  7828. if (rc != SQLITE_OK) {
  7829. vtab_set_error(&p->base, "Could not write to vectors blob for %s.%s.%lld",
  7830. p->schemaName, p->shadowVectorChunksNames[i], chunk_id);
  7831. goto cleanup;
  7832. }
  7833. cleanup:
  7834. cleanup(vector);
  7835. int brc = sqlite3_blob_close(blobVectors);
  7836. if (rc != SQLITE_OK) {
  7837. return rc;
  7838. }
  7839. if (brc != SQLITE_OK) {
  7840. vtab_set_error(
  7841. &p->base,
  7842. "Could not commit blob transaction for vectors blob for %s.%s.%lld",
  7843. p->schemaName, p->shadowVectorChunksNames[i], chunk_id);
  7844. return brc;
  7845. }
  7846. return SQLITE_OK;
  7847. }
  7848. int vec0Update_Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv) {
  7849. UNUSED_PARAMETER(argc);
  7850. vec0_vtab *p = (vec0_vtab *)pVTab;
  7851. int rc;
  7852. i64 chunk_id;
  7853. i64 chunk_offset;
  7854. i64 rowid;
  7855. if (p->pkIsText) {
  7856. const char *a = (const char *)sqlite3_value_text(argv[0]);
  7857. const char *b = (const char *)sqlite3_value_text(argv[1]);
  7858. // IMP: V08886_25725
  7859. if ((sqlite3_value_bytes(argv[0]) != sqlite3_value_bytes(argv[1])) ||
  7860. strncmp(a, b, sqlite3_value_bytes(argv[0])) != 0) {
  7861. vtab_set_error(pVTab,
  7862. "UPDATEs on vec0 primary key values are not allowed.");
  7863. return SQLITE_ERROR;
  7864. }
  7865. rc = vec0_rowid_from_id(p, argv[0], &rowid);
  7866. if (rc != SQLITE_OK) {
  7867. return rc;
  7868. }
  7869. } else {
  7870. rowid = sqlite3_value_int64(argv[0]);
  7871. }
  7872. // 1) get chunk_id and chunk_offset from _rowids
  7873. rc = vec0_get_chunk_position(p, rowid, NULL, &chunk_id, &chunk_offset);
  7874. if (rc != SQLITE_OK) {
  7875. return rc;
  7876. }
  7877. // 2) update any partition key values
  7878. for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
  7879. if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_PARTITION) {
  7880. continue;
  7881. }
  7882. sqlite3_value * value = argv[2+VEC0_COLUMN_USERN_START + i];
  7883. if(sqlite3_value_nochange(value)) {
  7884. continue;
  7885. }
  7886. vtab_set_error(pVTab, "UPDATE on partition key columns are not supported yet. ");
  7887. return SQLITE_ERROR;
  7888. }
  7889. // 3) handle auxiliary column updates
  7890. for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
  7891. if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY) {
  7892. continue;
  7893. }
  7894. int auxiliary_column_idx = p->user_column_idxs[i];
  7895. sqlite3_value * value = argv[2+VEC0_COLUMN_USERN_START + i];
  7896. if(sqlite3_value_nochange(value)) {
  7897. continue;
  7898. }
  7899. rc = vec0Update_UpdateAuxColumn(p, auxiliary_column_idx, value, rowid);
  7900. if(rc != SQLITE_OK) {
  7901. return SQLITE_ERROR;
  7902. }
  7903. }
  7904. // 4) handle metadata column updates
  7905. for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
  7906. if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_METADATA) {
  7907. continue;
  7908. }
  7909. int metadata_column_idx = p->user_column_idxs[i];
  7910. sqlite3_value * value = argv[2+VEC0_COLUMN_USERN_START + i];
  7911. if(sqlite3_value_nochange(value)) {
  7912. continue;
  7913. }
  7914. rc = vec0_write_metadata_value(p, metadata_column_idx, rowid, chunk_id, chunk_offset, value, 1);
  7915. if(rc != SQLITE_OK) {
  7916. return rc;
  7917. }
  7918. }
  7919. // 5) iterate over all new vectors, update the vectors
  7920. for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
  7921. if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_VECTOR) {
  7922. continue;
  7923. }
  7924. int vector_idx = p->user_column_idxs[i];
  7925. sqlite3_value *valueVector = argv[2 + VEC0_COLUMN_USERN_START + i];
  7926. // in vec0Column, we check sqlite3_vtab_nochange() on vector columns.
  7927. // If the vector column isn't being changed, we return NULL;
  7928. // That's not great, that means vector columns can never be NULLABLE
  7929. // (bc we cant distinguish if an updated vector is truly NULL or nochange).
  7930. // Also it means that if someone tries to run `UPDATE v SET X = NULL`,
  7931. // we can't effectively detect and raise an error.
  7932. // A better solution would be to use a custom result_type for "empty",
  7933. // but subtypes don't appear to survive xColumn -> xUpdate, it's always 0.
  7934. // So for now, we'll just use NULL and warn people to not SET X = NULL
  7935. // in the docs.
  7936. if (sqlite3_value_type(valueVector) == SQLITE_NULL) {
  7937. continue;
  7938. }
  7939. rc = vec0Update_UpdateVectorColumn(p, chunk_id, chunk_offset, vector_idx,
  7940. valueVector);
  7941. if (rc != SQLITE_OK) {
  7942. return SQLITE_ERROR;
  7943. }
  7944. }
  7945. return SQLITE_OK;
  7946. }
  7947. static int vec0Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv,
  7948. sqlite_int64 *pRowid) {
  7949. // DELETE operation
  7950. if (argc == 1 && sqlite3_value_type(argv[0]) != SQLITE_NULL) {
  7951. return vec0Update_Delete(pVTab, argv[0]);
  7952. }
  7953. // INSERT operation
  7954. else if (argc > 1 && sqlite3_value_type(argv[0]) == SQLITE_NULL) {
  7955. return vec0Update_Insert(pVTab, argc, argv, pRowid);
  7956. }
  7957. // UPDATE operation
  7958. else if (argc > 1 && sqlite3_value_type(argv[0]) != SQLITE_NULL) {
  7959. return vec0Update_Update(pVTab, argc, argv);
  7960. } else {
  7961. vtab_set_error(pVTab, "Unrecognized xUpdate operation provided for vec0.");
  7962. return SQLITE_ERROR;
  7963. }
  7964. }
  7965. static int vec0ShadowName(const char *zName) {
  7966. static const char *azName[] = {
  7967. "rowids", "chunks", "auxiliary", "info",
  7968. // Up to VEC0_MAX_METADATA_COLUMNS
  7969. // TODO be smarter about this man
  7970. "metadatachunks00",
  7971. "metadatachunks01",
  7972. "metadatachunks02",
  7973. "metadatachunks03",
  7974. "metadatachunks04",
  7975. "metadatachunks05",
  7976. "metadatachunks06",
  7977. "metadatachunks07",
  7978. "metadatachunks08",
  7979. "metadatachunks09",
  7980. "metadatachunks10",
  7981. "metadatachunks11",
  7982. "metadatachunks12",
  7983. "metadatachunks13",
  7984. "metadatachunks14",
  7985. "metadatachunks15",
  7986. // Up to
  7987. "metadatatext00",
  7988. "metadatatext01",
  7989. "metadatatext02",
  7990. "metadatatext03",
  7991. "metadatatext04",
  7992. "metadatatext05",
  7993. "metadatatext06",
  7994. "metadatatext07",
  7995. "metadatatext08",
  7996. "metadatatext09",
  7997. "metadatatext10",
  7998. "metadatatext11",
  7999. "metadatatext12",
  8000. "metadatatext13",
  8001. "metadatatext14",
  8002. "metadatatext15",
  8003. };
  8004. for (size_t i = 0; i < sizeof(azName) / sizeof(azName[0]); i++) {
  8005. if (sqlite3_stricmp(zName, azName[i]) == 0)
  8006. return 1;
  8007. }
  8008. //for(size_t i = 0; i < )"vector_chunks", "metadatachunks"
  8009. return 0;
  8010. }
  8011. static int vec0Begin(sqlite3_vtab *pVTab) {
  8012. UNUSED_PARAMETER(pVTab);
  8013. return SQLITE_OK;
  8014. }
  8015. static int vec0Sync(sqlite3_vtab *pVTab) {
  8016. UNUSED_PARAMETER(pVTab);
  8017. vec0_vtab *p = (vec0_vtab *)pVTab;
  8018. if (p->stmtLatestChunk) {
  8019. sqlite3_finalize(p->stmtLatestChunk);
  8020. p->stmtLatestChunk = NULL;
  8021. }
  8022. if (p->stmtRowidsInsertRowid) {
  8023. sqlite3_finalize(p->stmtRowidsInsertRowid);
  8024. p->stmtRowidsInsertRowid = NULL;
  8025. }
  8026. if (p->stmtRowidsInsertId) {
  8027. sqlite3_finalize(p->stmtRowidsInsertId);
  8028. p->stmtRowidsInsertId = NULL;
  8029. }
  8030. if (p->stmtRowidsUpdatePosition) {
  8031. sqlite3_finalize(p->stmtRowidsUpdatePosition);
  8032. p->stmtRowidsUpdatePosition = NULL;
  8033. }
  8034. if (p->stmtRowidsGetChunkPosition) {
  8035. sqlite3_finalize(p->stmtRowidsGetChunkPosition);
  8036. p->stmtRowidsGetChunkPosition = NULL;
  8037. }
  8038. return SQLITE_OK;
  8039. }
  8040. static int vec0Commit(sqlite3_vtab *pVTab) {
  8041. UNUSED_PARAMETER(pVTab);
  8042. return SQLITE_OK;
  8043. }
  8044. static int vec0Rollback(sqlite3_vtab *pVTab) {
  8045. UNUSED_PARAMETER(pVTab);
  8046. return SQLITE_OK;
  8047. }
  8048. static sqlite3_module vec0Module = {
  8049. /* iVersion */ 3,
  8050. /* xCreate */ vec0Create,
  8051. /* xConnect */ vec0Connect,
  8052. /* xBestIndex */ vec0BestIndex,
  8053. /* xDisconnect */ vec0Disconnect,
  8054. /* xDestroy */ vec0Destroy,
  8055. /* xOpen */ vec0Open,
  8056. /* xClose */ vec0Close,
  8057. /* xFilter */ vec0Filter,
  8058. /* xNext */ vec0Next,
  8059. /* xEof */ vec0Eof,
  8060. /* xColumn */ vec0Column,
  8061. /* xRowid */ vec0Rowid,
  8062. /* xUpdate */ vec0Update,
  8063. /* xBegin */ vec0Begin,
  8064. /* xSync */ vec0Sync,
  8065. /* xCommit */ vec0Commit,
  8066. /* xRollback */ vec0Rollback,
  8067. /* xFindFunction */ 0,
  8068. /* xRename */ 0, // https://github.com/asg017/sqlite-vec/issues/43
  8069. /* xSavepoint */ 0,
  8070. /* xRelease */ 0,
  8071. /* xRollbackTo */ 0,
  8072. /* xShadowName */ vec0ShadowName,
  8073. #if SQLITE_VERSION_NUMBER >= 3044000
  8074. /* xIntegrity */ 0, // https://github.com/asg017/sqlite-vec/issues/44
  8075. #endif
  8076. };
  8077. #pragma endregion
  8078. static char *POINTER_NAME_STATIC_BLOB_DEF = "vec0-static_blob_def";
  8079. struct static_blob_definition {
  8080. void *p;
  8081. size_t dimensions;
  8082. size_t nvectors;
  8083. enum VectorElementType element_type;
  8084. };
  8085. static void vec_static_blob_from_raw(sqlite3_context *context, int argc,
  8086. sqlite3_value **argv) {
  8087. assert(argc == 4);
  8088. struct static_blob_definition *p;
  8089. p = sqlite3_malloc(sizeof(*p));
  8090. if (!p) {
  8091. sqlite3_result_error_nomem(context);
  8092. return;
  8093. }
  8094. memset(p, 0, sizeof(*p));
  8095. p->p = (void *)sqlite3_value_int64(argv[0]);
  8096. p->element_type = SQLITE_VEC_ELEMENT_TYPE_FLOAT32;
  8097. p->dimensions = sqlite3_value_int64(argv[2]);
  8098. p->nvectors = sqlite3_value_int64(argv[3]);
  8099. sqlite3_result_pointer(context, p, POINTER_NAME_STATIC_BLOB_DEF,
  8100. sqlite3_free);
  8101. }
  8102. #pragma region vec_static_blobs() table function
  8103. #define MAX_STATIC_BLOBS 16
  8104. typedef struct static_blob static_blob;
  8105. struct static_blob {
  8106. char *name;
  8107. void *p;
  8108. size_t dimensions;
  8109. size_t nvectors;
  8110. enum VectorElementType element_type;
  8111. };
  8112. typedef struct vec_static_blob_data vec_static_blob_data;
  8113. struct vec_static_blob_data {
  8114. static_blob static_blobs[MAX_STATIC_BLOBS];
  8115. };
  8116. typedef struct vec_static_blobs_vtab vec_static_blobs_vtab;
  8117. struct vec_static_blobs_vtab {
  8118. sqlite3_vtab base;
  8119. vec_static_blob_data *data;
  8120. };
  8121. typedef struct vec_static_blobs_cursor vec_static_blobs_cursor;
  8122. struct vec_static_blobs_cursor {
  8123. sqlite3_vtab_cursor base;
  8124. sqlite3_int64 iRowid;
  8125. };
  8126. static int vec_static_blobsConnect(sqlite3 *db, void *pAux, int argc,
  8127. const char *const *argv,
  8128. sqlite3_vtab **ppVtab, char **pzErr) {
  8129. UNUSED_PARAMETER(argc);
  8130. UNUSED_PARAMETER(argv);
  8131. UNUSED_PARAMETER(pzErr);
  8132. vec_static_blobs_vtab *pNew;
  8133. #define VEC_STATIC_BLOBS_NAME 0
  8134. #define VEC_STATIC_BLOBS_DATA 1
  8135. #define VEC_STATIC_BLOBS_DIMENSIONS 2
  8136. #define VEC_STATIC_BLOBS_COUNT 3
  8137. int rc = sqlite3_declare_vtab(
  8138. db, "CREATE TABLE x(name, data, dimensions hidden, count hidden)");
  8139. if (rc == SQLITE_OK) {
  8140. pNew = sqlite3_malloc(sizeof(*pNew));
  8141. *ppVtab = (sqlite3_vtab *)pNew;
  8142. if (pNew == 0)
  8143. return SQLITE_NOMEM;
  8144. memset(pNew, 0, sizeof(*pNew));
  8145. pNew->data = pAux;
  8146. }
  8147. return rc;
  8148. }
  8149. static int vec_static_blobsDisconnect(sqlite3_vtab *pVtab) {
  8150. vec_static_blobs_vtab *p = (vec_static_blobs_vtab *)pVtab;
  8151. sqlite3_free(p);
  8152. return SQLITE_OK;
  8153. }
  8154. static int vec_static_blobsUpdate(sqlite3_vtab *pVTab, int argc,
  8155. sqlite3_value **argv, sqlite_int64 *pRowid) {
  8156. UNUSED_PARAMETER(pRowid);
  8157. vec_static_blobs_vtab *p = (vec_static_blobs_vtab *)pVTab;
  8158. // DELETE operation
  8159. if (argc == 1 && sqlite3_value_type(argv[0]) != SQLITE_NULL) {
  8160. return SQLITE_ERROR;
  8161. }
  8162. // INSERT operation
  8163. else if (argc > 1 && sqlite3_value_type(argv[0]) == SQLITE_NULL) {
  8164. const char *key =
  8165. (const char *)sqlite3_value_text(argv[2 + VEC_STATIC_BLOBS_NAME]);
  8166. int idx = -1;
  8167. for (int i = 0; i < MAX_STATIC_BLOBS; i++) {
  8168. if (!p->data->static_blobs[i].name) {
  8169. p->data->static_blobs[i].name = sqlite3_mprintf("%s", key);
  8170. idx = i;
  8171. break;
  8172. }
  8173. }
  8174. if (idx < 0)
  8175. abort();
  8176. struct static_blob_definition *def = sqlite3_value_pointer(
  8177. argv[2 + VEC_STATIC_BLOBS_DATA], POINTER_NAME_STATIC_BLOB_DEF);
  8178. p->data->static_blobs[idx].p = def->p;
  8179. p->data->static_blobs[idx].dimensions = def->dimensions;
  8180. p->data->static_blobs[idx].nvectors = def->nvectors;
  8181. p->data->static_blobs[idx].element_type = def->element_type;
  8182. return SQLITE_OK;
  8183. }
  8184. // UPDATE operation
  8185. else if (argc > 1 && sqlite3_value_type(argv[0]) != SQLITE_NULL) {
  8186. return SQLITE_ERROR;
  8187. }
  8188. return SQLITE_ERROR;
  8189. }
  8190. static int vec_static_blobsOpen(sqlite3_vtab *p,
  8191. sqlite3_vtab_cursor **ppCursor) {
  8192. UNUSED_PARAMETER(p);
  8193. vec_static_blobs_cursor *pCur;
  8194. pCur = sqlite3_malloc(sizeof(*pCur));
  8195. if (pCur == 0)
  8196. return SQLITE_NOMEM;
  8197. memset(pCur, 0, sizeof(*pCur));
  8198. *ppCursor = &pCur->base;
  8199. return SQLITE_OK;
  8200. }
  8201. static int vec_static_blobsClose(sqlite3_vtab_cursor *cur) {
  8202. vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur;
  8203. sqlite3_free(pCur);
  8204. return SQLITE_OK;
  8205. }
  8206. static int vec_static_blobsBestIndex(sqlite3_vtab *pVTab,
  8207. sqlite3_index_info *pIdxInfo) {
  8208. UNUSED_PARAMETER(pVTab);
  8209. pIdxInfo->idxNum = 1;
  8210. pIdxInfo->estimatedCost = (double)10;
  8211. pIdxInfo->estimatedRows = 10;
  8212. return SQLITE_OK;
  8213. }
  8214. static int vec_static_blobsNext(sqlite3_vtab_cursor *cur);
  8215. static int vec_static_blobsFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum,
  8216. const char *idxStr, int argc,
  8217. sqlite3_value **argv) {
  8218. UNUSED_PARAMETER(idxNum);
  8219. UNUSED_PARAMETER(idxStr);
  8220. UNUSED_PARAMETER(argc);
  8221. UNUSED_PARAMETER(argv);
  8222. vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)pVtabCursor;
  8223. pCur->iRowid = -1;
  8224. vec_static_blobsNext(pVtabCursor);
  8225. return SQLITE_OK;
  8226. }
  8227. static int vec_static_blobsRowid(sqlite3_vtab_cursor *cur,
  8228. sqlite_int64 *pRowid) {
  8229. vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur;
  8230. *pRowid = pCur->iRowid;
  8231. return SQLITE_OK;
  8232. }
  8233. static int vec_static_blobsNext(sqlite3_vtab_cursor *cur) {
  8234. vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur;
  8235. vec_static_blobs_vtab *p = (vec_static_blobs_vtab *)pCur->base.pVtab;
  8236. pCur->iRowid++;
  8237. while (pCur->iRowid < MAX_STATIC_BLOBS) {
  8238. if (p->data->static_blobs[pCur->iRowid].name) {
  8239. return SQLITE_OK;
  8240. }
  8241. pCur->iRowid++;
  8242. }
  8243. return SQLITE_OK;
  8244. }
  8245. static int vec_static_blobsEof(sqlite3_vtab_cursor *cur) {
  8246. vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur;
  8247. return pCur->iRowid >= MAX_STATIC_BLOBS;
  8248. }
  8249. static int vec_static_blobsColumn(sqlite3_vtab_cursor *cur,
  8250. sqlite3_context *context, int i) {
  8251. vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur;
  8252. vec_static_blobs_vtab *p = (vec_static_blobs_vtab *)cur->pVtab;
  8253. switch (i) {
  8254. case VEC_STATIC_BLOBS_NAME:
  8255. sqlite3_result_text(context, p->data->static_blobs[pCur->iRowid].name, -1,
  8256. SQLITE_TRANSIENT);
  8257. break;
  8258. case VEC_STATIC_BLOBS_DATA:
  8259. sqlite3_result_null(context);
  8260. break;
  8261. case VEC_STATIC_BLOBS_DIMENSIONS:
  8262. sqlite3_result_int64(context,
  8263. p->data->static_blobs[pCur->iRowid].dimensions);
  8264. break;
  8265. case VEC_STATIC_BLOBS_COUNT:
  8266. sqlite3_result_int64(context, p->data->static_blobs[pCur->iRowid].nvectors);
  8267. break;
  8268. }
  8269. return SQLITE_OK;
  8270. }
  8271. static sqlite3_module vec_static_blobsModule = {
  8272. /* iVersion */ 3,
  8273. /* xCreate */ 0,
  8274. /* xConnect */ vec_static_blobsConnect,
  8275. /* xBestIndex */ vec_static_blobsBestIndex,
  8276. /* xDisconnect */ vec_static_blobsDisconnect,
  8277. /* xDestroy */ 0,
  8278. /* xOpen */ vec_static_blobsOpen,
  8279. /* xClose */ vec_static_blobsClose,
  8280. /* xFilter */ vec_static_blobsFilter,
  8281. /* xNext */ vec_static_blobsNext,
  8282. /* xEof */ vec_static_blobsEof,
  8283. /* xColumn */ vec_static_blobsColumn,
  8284. /* xRowid */ vec_static_blobsRowid,
  8285. /* xUpdate */ vec_static_blobsUpdate,
  8286. /* xBegin */ 0,
  8287. /* xSync */ 0,
  8288. /* xCommit */ 0,
  8289. /* xRollback */ 0,
  8290. /* xFindMethod */ 0,
  8291. /* xRename */ 0,
  8292. /* xSavepoint */ 0,
  8293. /* xRelease */ 0,
  8294. /* xRollbackTo */ 0,
  8295. /* xShadowName */ 0,
  8296. #if SQLITE_VERSION_NUMBER >= 3044000
  8297. /* xIntegrity */ 0
  8298. #endif
  8299. };
  8300. #pragma endregion
  8301. #pragma region vec_static_blob_entries() table function
  8302. typedef struct vec_static_blob_entries_vtab vec_static_blob_entries_vtab;
  8303. struct vec_static_blob_entries_vtab {
  8304. sqlite3_vtab base;
  8305. static_blob *blob;
  8306. };
  8307. typedef enum {
  8308. VEC_SBE__QUERYPLAN_FULLSCAN = 1,
  8309. VEC_SBE__QUERYPLAN_KNN = 2
  8310. } vec_sbe_query_plan;
  8311. struct sbe_query_knn_data {
  8312. i64 k;
  8313. i64 k_used;
  8314. // Array of rowids of size k. Must be freed with sqlite3_free().
  8315. i32 *rowids;
  8316. // Array of distances of size k. Must be freed with sqlite3_free().
  8317. f32 *distances;
  8318. i64 current_idx;
  8319. };
  8320. void sbe_query_knn_data_clear(struct sbe_query_knn_data *knn_data) {
  8321. if (!knn_data)
  8322. return;
  8323. if (knn_data->rowids) {
  8324. sqlite3_free(knn_data->rowids);
  8325. knn_data->rowids = NULL;
  8326. }
  8327. if (knn_data->distances) {
  8328. sqlite3_free(knn_data->distances);
  8329. knn_data->distances = NULL;
  8330. }
  8331. }
  8332. typedef struct vec_static_blob_entries_cursor vec_static_blob_entries_cursor;
  8333. struct vec_static_blob_entries_cursor {
  8334. sqlite3_vtab_cursor base;
  8335. sqlite3_int64 iRowid;
  8336. vec_sbe_query_plan query_plan;
  8337. struct sbe_query_knn_data *knn_data;
  8338. };
  8339. static int vec_static_blob_entriesConnect(sqlite3 *db, void *pAux, int argc,
  8340. const char *const *argv,
  8341. sqlite3_vtab **ppVtab, char **pzErr) {
  8342. UNUSED_PARAMETER(argc);
  8343. UNUSED_PARAMETER(argv);
  8344. UNUSED_PARAMETER(pzErr);
  8345. vec_static_blob_data *blob_data = pAux;
  8346. int idx = -1;
  8347. for (int i = 0; i < MAX_STATIC_BLOBS; i++) {
  8348. if (!blob_data->static_blobs[i].name)
  8349. continue;
  8350. if (strncmp(blob_data->static_blobs[i].name, argv[3],
  8351. strlen(blob_data->static_blobs[i].name)) == 0) {
  8352. idx = i;
  8353. break;
  8354. }
  8355. }
  8356. if (idx < 0)
  8357. abort();
  8358. vec_static_blob_entries_vtab *pNew;
  8359. #define VEC_STATIC_BLOB_ENTRIES_VECTOR 0
  8360. #define VEC_STATIC_BLOB_ENTRIES_DISTANCE 1
  8361. #define VEC_STATIC_BLOB_ENTRIES_K 2
  8362. int rc = sqlite3_declare_vtab(
  8363. db, "CREATE TABLE x(vector, distance hidden, k hidden)");
  8364. if (rc == SQLITE_OK) {
  8365. pNew = sqlite3_malloc(sizeof(*pNew));
  8366. *ppVtab = (sqlite3_vtab *)pNew;
  8367. if (pNew == 0)
  8368. return SQLITE_NOMEM;
  8369. memset(pNew, 0, sizeof(*pNew));
  8370. pNew->blob = &blob_data->static_blobs[idx];
  8371. }
  8372. return rc;
  8373. }
  8374. static int vec_static_blob_entriesCreate(sqlite3 *db, void *pAux, int argc,
  8375. const char *const *argv,
  8376. sqlite3_vtab **ppVtab, char **pzErr) {
  8377. return vec_static_blob_entriesConnect(db, pAux, argc, argv, ppVtab, pzErr);
  8378. }
  8379. static int vec_static_blob_entriesDisconnect(sqlite3_vtab *pVtab) {
  8380. vec_static_blob_entries_vtab *p = (vec_static_blob_entries_vtab *)pVtab;
  8381. sqlite3_free(p);
  8382. return SQLITE_OK;
  8383. }
  8384. static int vec_static_blob_entriesOpen(sqlite3_vtab *p,
  8385. sqlite3_vtab_cursor **ppCursor) {
  8386. UNUSED_PARAMETER(p);
  8387. vec_static_blob_entries_cursor *pCur;
  8388. pCur = sqlite3_malloc(sizeof(*pCur));
  8389. if (pCur == 0)
  8390. return SQLITE_NOMEM;
  8391. memset(pCur, 0, sizeof(*pCur));
  8392. *ppCursor = &pCur->base;
  8393. return SQLITE_OK;
  8394. }
  8395. static int vec_static_blob_entriesClose(sqlite3_vtab_cursor *cur) {
  8396. vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur;
  8397. sqlite3_free(pCur->knn_data);
  8398. sqlite3_free(pCur);
  8399. return SQLITE_OK;
  8400. }
  8401. static int vec_static_blob_entriesBestIndex(sqlite3_vtab *pVTab,
  8402. sqlite3_index_info *pIdxInfo) {
  8403. vec_static_blob_entries_vtab *p = (vec_static_blob_entries_vtab *)pVTab;
  8404. int iMatchTerm = -1;
  8405. int iLimitTerm = -1;
  8406. // int iRowidTerm = -1; // https://github.com/asg017/sqlite-vec/issues/47
  8407. int iKTerm = -1;
  8408. for (int i = 0; i < pIdxInfo->nConstraint; i++) {
  8409. if (!pIdxInfo->aConstraint[i].usable)
  8410. continue;
  8411. int iColumn = pIdxInfo->aConstraint[i].iColumn;
  8412. int op = pIdxInfo->aConstraint[i].op;
  8413. if (op == SQLITE_INDEX_CONSTRAINT_MATCH &&
  8414. iColumn == VEC_STATIC_BLOB_ENTRIES_VECTOR) {
  8415. if (iMatchTerm > -1) {
  8416. // https://github.com/asg017/sqlite-vec/issues/51
  8417. return SQLITE_ERROR;
  8418. }
  8419. iMatchTerm = i;
  8420. }
  8421. if (op == SQLITE_INDEX_CONSTRAINT_LIMIT) {
  8422. iLimitTerm = i;
  8423. }
  8424. if (op == SQLITE_INDEX_CONSTRAINT_EQ &&
  8425. iColumn == VEC_STATIC_BLOB_ENTRIES_K) {
  8426. iKTerm = i;
  8427. }
  8428. }
  8429. if (iMatchTerm >= 0) {
  8430. if (iLimitTerm < 0 && iKTerm < 0) {
  8431. // https://github.com/asg017/sqlite-vec/issues/51
  8432. return SQLITE_ERROR;
  8433. }
  8434. if (iLimitTerm >= 0 && iKTerm >= 0) {
  8435. return SQLITE_ERROR; // limit or k, not both
  8436. }
  8437. if (pIdxInfo->nOrderBy < 1) {
  8438. vtab_set_error(pVTab, "ORDER BY distance required");
  8439. return SQLITE_CONSTRAINT;
  8440. }
  8441. if (pIdxInfo->nOrderBy > 1) {
  8442. // https://github.com/asg017/sqlite-vec/issues/51
  8443. vtab_set_error(pVTab, "more than 1 ORDER BY clause provided");
  8444. return SQLITE_CONSTRAINT;
  8445. }
  8446. if (pIdxInfo->aOrderBy[0].iColumn != VEC_STATIC_BLOB_ENTRIES_DISTANCE) {
  8447. vtab_set_error(pVTab, "ORDER BY must be on the distance column");
  8448. return SQLITE_CONSTRAINT;
  8449. }
  8450. if (pIdxInfo->aOrderBy[0].desc) {
  8451. vtab_set_error(pVTab,
  8452. "Only ascending in ORDER BY distance clause is supported, "
  8453. "DESC is not supported yet.");
  8454. return SQLITE_CONSTRAINT;
  8455. }
  8456. pIdxInfo->idxNum = VEC_SBE__QUERYPLAN_KNN;
  8457. pIdxInfo->estimatedCost = (double)10;
  8458. pIdxInfo->estimatedRows = 10;
  8459. pIdxInfo->orderByConsumed = 1;
  8460. pIdxInfo->aConstraintUsage[iMatchTerm].argvIndex = 1;
  8461. pIdxInfo->aConstraintUsage[iMatchTerm].omit = 1;
  8462. if (iLimitTerm >= 0) {
  8463. pIdxInfo->aConstraintUsage[iLimitTerm].argvIndex = 2;
  8464. pIdxInfo->aConstraintUsage[iLimitTerm].omit = 1;
  8465. } else {
  8466. pIdxInfo->aConstraintUsage[iKTerm].argvIndex = 2;
  8467. pIdxInfo->aConstraintUsage[iKTerm].omit = 1;
  8468. }
  8469. } else {
  8470. pIdxInfo->idxNum = VEC_SBE__QUERYPLAN_FULLSCAN;
  8471. pIdxInfo->estimatedCost = (double)p->blob->nvectors;
  8472. pIdxInfo->estimatedRows = p->blob->nvectors;
  8473. }
  8474. return SQLITE_OK;
  8475. }
  8476. static int vec_static_blob_entriesFilter(sqlite3_vtab_cursor *pVtabCursor,
  8477. int idxNum, const char *idxStr,
  8478. int argc, sqlite3_value **argv) {
  8479. UNUSED_PARAMETER(idxStr);
  8480. assert(argc >= 0 && argc <= 3);
  8481. vec_static_blob_entries_cursor *pCur =
  8482. (vec_static_blob_entries_cursor *)pVtabCursor;
  8483. vec_static_blob_entries_vtab *p =
  8484. (vec_static_blob_entries_vtab *)pCur->base.pVtab;
  8485. if (idxNum == VEC_SBE__QUERYPLAN_KNN) {
  8486. assert(argc == 2);
  8487. pCur->query_plan = VEC_SBE__QUERYPLAN_KNN;
  8488. struct sbe_query_knn_data *knn_data;
  8489. knn_data = sqlite3_malloc(sizeof(*knn_data));
  8490. if (!knn_data) {
  8491. return SQLITE_NOMEM;
  8492. }
  8493. memset(knn_data, 0, sizeof(*knn_data));
  8494. void *queryVector;
  8495. size_t dimensions;
  8496. enum VectorElementType elementType;
  8497. vector_cleanup cleanup;
  8498. char *err;
  8499. int rc = vector_from_value(argv[0], &queryVector, &dimensions, &elementType,
  8500. &cleanup, &err);
  8501. if (rc != SQLITE_OK) {
  8502. return SQLITE_ERROR;
  8503. }
  8504. if (elementType != p->blob->element_type) {
  8505. return SQLITE_ERROR;
  8506. }
  8507. if (dimensions != p->blob->dimensions) {
  8508. return SQLITE_ERROR;
  8509. }
  8510. i64 k = min(sqlite3_value_int64(argv[1]), (i64)p->blob->nvectors);
  8511. if (k < 0) {
  8512. // HANDLE https://github.com/asg017/sqlite-vec/issues/55
  8513. return SQLITE_ERROR;
  8514. }
  8515. if (k == 0) {
  8516. knn_data->k = 0;
  8517. pCur->knn_data = knn_data;
  8518. return SQLITE_OK;
  8519. }
  8520. size_t bsize = (p->blob->nvectors + 7) & ~7;
  8521. i32 *topk_rowids = sqlite3_malloc(k * sizeof(i32));
  8522. if (!topk_rowids) {
  8523. // HANDLE https://github.com/asg017/sqlite-vec/issues/55
  8524. return SQLITE_ERROR;
  8525. }
  8526. f32 *distances = sqlite3_malloc(bsize * sizeof(f32));
  8527. if (!distances) {
  8528. // HANDLE https://github.com/asg017/sqlite-vec/issues/55
  8529. return SQLITE_ERROR;
  8530. }
  8531. for (size_t i = 0; i < p->blob->nvectors; i++) {
  8532. // https://github.com/asg017/sqlite-vec/issues/52
  8533. float *v = ((float *)p->blob->p) + (i * p->blob->dimensions);
  8534. distances[i] =
  8535. distance_l2_sqr_float(v, (float *)queryVector, &p->blob->dimensions);
  8536. }
  8537. u8 *candidates = bitmap_new(bsize);
  8538. assert(candidates);
  8539. u8 *taken = bitmap_new(bsize);
  8540. assert(taken);
  8541. bitmap_fill(candidates, bsize);
  8542. for (size_t i = bsize; i >= p->blob->nvectors; i--) {
  8543. bitmap_set(candidates, i, 0);
  8544. }
  8545. i32 k_used = 0;
  8546. min_idx(distances, bsize, candidates, topk_rowids, k, taken, &k_used);
  8547. knn_data->current_idx = 0;
  8548. knn_data->distances = distances;
  8549. knn_data->k = k;
  8550. knn_data->rowids = topk_rowids;
  8551. pCur->knn_data = knn_data;
  8552. } else {
  8553. pCur->query_plan = VEC_SBE__QUERYPLAN_FULLSCAN;
  8554. pCur->iRowid = 0;
  8555. }
  8556. return SQLITE_OK;
  8557. }
  8558. static int vec_static_blob_entriesRowid(sqlite3_vtab_cursor *cur,
  8559. sqlite_int64 *pRowid) {
  8560. vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur;
  8561. switch (pCur->query_plan) {
  8562. case VEC_SBE__QUERYPLAN_FULLSCAN: {
  8563. *pRowid = pCur->iRowid;
  8564. return SQLITE_OK;
  8565. }
  8566. case VEC_SBE__QUERYPLAN_KNN: {
  8567. i32 rowid = ((i32 *)pCur->knn_data->rowids)[pCur->knn_data->current_idx];
  8568. *pRowid = (sqlite3_int64)rowid;
  8569. return SQLITE_OK;
  8570. }
  8571. }
  8572. return SQLITE_ERROR;
  8573. }
  8574. static int vec_static_blob_entriesNext(sqlite3_vtab_cursor *cur) {
  8575. vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur;
  8576. switch (pCur->query_plan) {
  8577. case VEC_SBE__QUERYPLAN_FULLSCAN: {
  8578. pCur->iRowid++;
  8579. return SQLITE_OK;
  8580. }
  8581. case VEC_SBE__QUERYPLAN_KNN: {
  8582. pCur->knn_data->current_idx++;
  8583. return SQLITE_OK;
  8584. }
  8585. }
  8586. return SQLITE_ERROR;
  8587. }
  8588. static int vec_static_blob_entriesEof(sqlite3_vtab_cursor *cur) {
  8589. vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur;
  8590. vec_static_blob_entries_vtab *p =
  8591. (vec_static_blob_entries_vtab *)pCur->base.pVtab;
  8592. switch (pCur->query_plan) {
  8593. case VEC_SBE__QUERYPLAN_FULLSCAN: {
  8594. return (size_t)pCur->iRowid >= p->blob->nvectors;
  8595. }
  8596. case VEC_SBE__QUERYPLAN_KNN: {
  8597. return pCur->knn_data->current_idx >= pCur->knn_data->k;
  8598. }
  8599. }
  8600. return SQLITE_ERROR;
  8601. }
  8602. static int vec_static_blob_entriesColumn(sqlite3_vtab_cursor *cur,
  8603. sqlite3_context *context, int i) {
  8604. vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur;
  8605. vec_static_blob_entries_vtab *p = (vec_static_blob_entries_vtab *)cur->pVtab;
  8606. switch (pCur->query_plan) {
  8607. case VEC_SBE__QUERYPLAN_FULLSCAN: {
  8608. switch (i) {
  8609. case VEC_STATIC_BLOB_ENTRIES_VECTOR:
  8610. sqlite3_result_blob(
  8611. context,
  8612. ((unsigned char *)p->blob->p) +
  8613. (pCur->iRowid * p->blob->dimensions * sizeof(float)),
  8614. p->blob->dimensions * sizeof(float), SQLITE_TRANSIENT);
  8615. sqlite3_result_subtype(context, p->blob->element_type);
  8616. break;
  8617. }
  8618. return SQLITE_OK;
  8619. }
  8620. case VEC_SBE__QUERYPLAN_KNN: {
  8621. switch (i) {
  8622. case VEC_STATIC_BLOB_ENTRIES_VECTOR: {
  8623. i32 rowid = ((i32 *)pCur->knn_data->rowids)[pCur->knn_data->current_idx];
  8624. sqlite3_result_blob(context,
  8625. ((unsigned char *)p->blob->p) +
  8626. (rowid * p->blob->dimensions * sizeof(float)),
  8627. p->blob->dimensions * sizeof(float),
  8628. SQLITE_TRANSIENT);
  8629. sqlite3_result_subtype(context, p->blob->element_type);
  8630. break;
  8631. }
  8632. }
  8633. return SQLITE_OK;
  8634. }
  8635. }
  8636. return SQLITE_ERROR;
  8637. }
  8638. static sqlite3_module vec_static_blob_entriesModule = {
  8639. /* iVersion */ 3,
  8640. /* xCreate */
  8641. vec_static_blob_entriesCreate, // handle rm?
  8642. // https://github.com/asg017/sqlite-vec/issues/55
  8643. /* xConnect */ vec_static_blob_entriesConnect,
  8644. /* xBestIndex */ vec_static_blob_entriesBestIndex,
  8645. /* xDisconnect */ vec_static_blob_entriesDisconnect,
  8646. /* xDestroy */ vec_static_blob_entriesDisconnect,
  8647. /* xOpen */ vec_static_blob_entriesOpen,
  8648. /* xClose */ vec_static_blob_entriesClose,
  8649. /* xFilter */ vec_static_blob_entriesFilter,
  8650. /* xNext */ vec_static_blob_entriesNext,
  8651. /* xEof */ vec_static_blob_entriesEof,
  8652. /* xColumn */ vec_static_blob_entriesColumn,
  8653. /* xRowid */ vec_static_blob_entriesRowid,
  8654. /* xUpdate */ 0,
  8655. /* xBegin */ 0,
  8656. /* xSync */ 0,
  8657. /* xCommit */ 0,
  8658. /* xRollback */ 0,
  8659. /* xFindMethod */ 0,
  8660. /* xRename */ 0,
  8661. /* xSavepoint */ 0,
  8662. /* xRelease */ 0,
  8663. /* xRollbackTo */ 0,
  8664. /* xShadowName */ 0,
  8665. #if SQLITE_VERSION_NUMBER >= 3044000
  8666. /* xIntegrity */ 0
  8667. #endif
  8668. };
  8669. #pragma endregion
  8670. #ifdef SQLITE_VEC_ENABLE_AVX
  8671. #define SQLITE_VEC_DEBUG_BUILD_AVX "avx"
  8672. #else
  8673. #define SQLITE_VEC_DEBUG_BUILD_AVX ""
  8674. #endif
  8675. #ifdef SQLITE_VEC_ENABLE_NEON
  8676. #define SQLITE_VEC_DEBUG_BUILD_NEON "neon"
  8677. #else
  8678. #define SQLITE_VEC_DEBUG_BUILD_NEON ""
  8679. #endif
  8680. #define SQLITE_VEC_DEBUG_BUILD \
  8681. SQLITE_VEC_DEBUG_BUILD_AVX " " SQLITE_VEC_DEBUG_BUILD_NEON
  8682. #define SQLITE_VEC_DEBUG_STRING \
  8683. "Version: " SQLITE_VEC_VERSION "\n" \
  8684. "Date: " SQLITE_VEC_DATE "\n" \
  8685. "Commit: " SQLITE_VEC_SOURCE "\n" \
  8686. "Build flags: " SQLITE_VEC_DEBUG_BUILD
  8687. SQLITE_VEC_API int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg,
  8688. const sqlite3_api_routines *pApi) {
  8689. #ifndef SQLITE_CORE
  8690. SQLITE_EXTENSION_INIT2(pApi);
  8691. #endif
  8692. int rc = SQLITE_OK;
  8693. #define DEFAULT_FLAGS (SQLITE_UTF8 | SQLITE_INNOCUOUS | SQLITE_DETERMINISTIC)
  8694. rc = sqlite3_create_function_v2(db, "vec_version", 0, DEFAULT_FLAGS,
  8695. SQLITE_VEC_VERSION, _static_text_func, NULL,
  8696. NULL, NULL);
  8697. if (rc != SQLITE_OK) {
  8698. return rc;
  8699. }
  8700. rc = sqlite3_create_function_v2(db, "vec_debug", 0, DEFAULT_FLAGS,
  8701. SQLITE_VEC_DEBUG_STRING, _static_text_func,
  8702. NULL, NULL, NULL);
  8703. if (rc != SQLITE_OK) {
  8704. return rc;
  8705. }
  8706. static struct {
  8707. const char *zFName;
  8708. void (*xFunc)(sqlite3_context *, int, sqlite3_value **);
  8709. int nArg;
  8710. int flags;
  8711. } aFunc[] = {
  8712. // clang-format off
  8713. //{"vec_version", _static_text_func, 0, DEFAULT_FLAGS, (void *) SQLITE_VEC_VERSION },
  8714. //{"vec_debug", _static_text_func, 0, DEFAULT_FLAGS, (void *) SQLITE_VEC_DEBUG_STRING },
  8715. {"vec_distance_l2", vec_distance_l2, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE, },
  8716. {"vec_distance_l1", vec_distance_l1, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE, },
  8717. {"vec_distance_hamming",vec_distance_hamming, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE, },
  8718. {"vec_distance_cosine", vec_distance_cosine, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE, },
  8719. {"vec_length", vec_length, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE, },
  8720. {"vec_type", vec_type, 1, DEFAULT_FLAGS, },
  8721. {"vec_to_json", vec_to_json, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
  8722. {"vec_add", vec_add, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
  8723. {"vec_sub", vec_sub, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
  8724. {"vec_slice", vec_slice, 3, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
  8725. {"vec_normalize", vec_normalize, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
  8726. {"vec_f32", vec_f32, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
  8727. {"vec_bit", vec_bit, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
  8728. {"vec_int8", vec_int8, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
  8729. {"vec_quantize_int8", vec_quantize_int8, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
  8730. {"vec_quantize_binary", vec_quantize_binary, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
  8731. // clang-format on
  8732. };
  8733. static struct {
  8734. char *name;
  8735. const sqlite3_module *module;
  8736. void *p;
  8737. void (*xDestroy)(void *);
  8738. } aMod[] = {
  8739. // clang-format off
  8740. {"vec0", &vec0Module, NULL, NULL},
  8741. {"vec_each", &vec_eachModule, NULL, NULL},
  8742. // clang-format on
  8743. };
  8744. for (unsigned long i = 0; i < countof(aFunc) && rc == SQLITE_OK; i++) {
  8745. rc = sqlite3_create_function_v2(db, aFunc[i].zFName, aFunc[i].nArg,
  8746. aFunc[i].flags, NULL, aFunc[i].xFunc, NULL,
  8747. NULL, NULL);
  8748. if (rc != SQLITE_OK) {
  8749. *pzErrMsg = sqlite3_mprintf("Error creating function %s: %s",
  8750. aFunc[i].zFName, sqlite3_errmsg(db));
  8751. return rc;
  8752. }
  8753. }
  8754. for (unsigned long i = 0; i < countof(aMod) && rc == SQLITE_OK; i++) {
  8755. rc = sqlite3_create_module_v2(db, aMod[i].name, aMod[i].module, NULL, NULL);
  8756. if (rc != SQLITE_OK) {
  8757. *pzErrMsg = sqlite3_mprintf("Error creating module %s: %s", aMod[i].name,
  8758. sqlite3_errmsg(db));
  8759. return rc;
  8760. }
  8761. }
  8762. return SQLITE_OK;
  8763. }
  8764. #ifndef SQLITE_VEC_OMIT_FS
  8765. SQLITE_VEC_API int sqlite3_vec_numpy_init(sqlite3 *db, char **pzErrMsg,
  8766. const sqlite3_api_routines *pApi) {
  8767. UNUSED_PARAMETER(pzErrMsg);
  8768. #ifndef SQLITE_CORE
  8769. SQLITE_EXTENSION_INIT2(pApi);
  8770. #endif
  8771. int rc = SQLITE_OK;
  8772. rc = sqlite3_create_function_v2(db, "vec_npy_file", 1, SQLITE_RESULT_SUBTYPE,
  8773. NULL, vec_npy_file, NULL, NULL, NULL);
  8774. if(rc != SQLITE_OK) {
  8775. return rc;
  8776. }
  8777. rc = sqlite3_create_module_v2(db, "vec_npy_each", &vec_npy_eachModule, NULL, NULL);
  8778. return rc;
  8779. }
  8780. #endif
  8781. SQLITE_VEC_API int
  8782. sqlite3_vec_static_blobs_init(sqlite3 *db, char **pzErrMsg,
  8783. const sqlite3_api_routines *pApi) {
  8784. UNUSED_PARAMETER(pzErrMsg);
  8785. #ifndef SQLITE_CORE
  8786. SQLITE_EXTENSION_INIT2(pApi);
  8787. #endif
  8788. int rc = SQLITE_OK;
  8789. vec_static_blob_data *static_blob_data;
  8790. static_blob_data = sqlite3_malloc(sizeof(*static_blob_data));
  8791. if (!static_blob_data) {
  8792. return SQLITE_NOMEM;
  8793. }
  8794. memset(static_blob_data, 0, sizeof(*static_blob_data));
  8795. rc = sqlite3_create_function_v2(
  8796. db, "vec_static_blob_from_raw", 4,
  8797. DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, NULL,
  8798. vec_static_blob_from_raw, NULL, NULL, NULL);
  8799. if (rc != SQLITE_OK)
  8800. return rc;
  8801. rc = sqlite3_create_module_v2(db, "vec_static_blobs", &vec_static_blobsModule,
  8802. static_blob_data, sqlite3_free);
  8803. if (rc != SQLITE_OK)
  8804. return rc;
  8805. rc = sqlite3_create_module_v2(db, "vec_static_blob_entries",
  8806. &vec_static_blob_entriesModule,
  8807. static_blob_data, NULL);
  8808. if (rc != SQLITE_OK)
  8809. return rc;
  8810. return rc;
  8811. }