| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757 |
- #include "sqlite-vec.h"
- #include <assert.h>
- #include <errno.h>
- #include <float.h>
- #include <inttypes.h>
- #include <limits.h>
- #include <math.h>
- #include <stdbool.h>
- #include <stdint.h>
- #include <stdlib.h>
- #include <string.h>
- #ifndef SQLITE_VEC_OMIT_FS
- #include <stdio.h>
- #endif
- #ifndef SQLITE_CORE
- #include "sqlite3ext.h"
- SQLITE_EXTENSION_INIT1
- #else
- #include "sqlite3.h"
- #endif
- #ifndef UINT32_TYPE
- #ifdef HAVE_UINT32_T
- #define UINT32_TYPE uint32_t
- #else
- #define UINT32_TYPE unsigned int
- #endif
- #endif
- #ifndef UINT16_TYPE
- #ifdef HAVE_UINT16_T
- #define UINT16_TYPE uint16_t
- #else
- #define UINT16_TYPE unsigned short int
- #endif
- #endif
- #ifndef INT16_TYPE
- #ifdef HAVE_INT16_T
- #define INT16_TYPE int16_t
- #else
- #define INT16_TYPE short int
- #endif
- #endif
- #ifndef UINT8_TYPE
- #ifdef HAVE_UINT8_T
- #define UINT8_TYPE uint8_t
- #else
- #define UINT8_TYPE unsigned char
- #endif
- #endif
- #ifndef INT8_TYPE
- #ifdef HAVE_INT8_T
- #define INT8_TYPE int8_t
- #else
- #define INT8_TYPE signed char
- #endif
- #endif
- #ifndef LONGDOUBLE_TYPE
- #define LONGDOUBLE_TYPE long double
- #endif
- #ifndef _WIN32
- #ifndef __EMSCRIPTEN__
- #ifndef __COSMOPOLITAN__
- #ifndef __wasi__
- typedef u_int8_t uint8_t;
- typedef u_int16_t uint16_t;
- typedef u_int64_t uint64_t;
- #endif
- #endif
- #endif
- #endif
- typedef int8_t i8;
- typedef uint8_t u8;
- typedef int16_t i16;
- typedef int32_t i32;
- typedef sqlite3_int64 i64;
- typedef uint32_t u32;
- typedef uint64_t u64;
- typedef float f32;
- typedef size_t usize;
- #ifndef UNUSED_PARAMETER
- #define UNUSED_PARAMETER(X) (void)(X)
- #endif
- // sqlite3_vtab_in() was added in SQLite version 3.38 (2022-02-22)
- // https://www.sqlite.org/changes.html#version_3_38_0
- #if SQLITE_VERSION_NUMBER >= 3038000
- #define COMPILER_SUPPORTS_VTAB_IN 1
- #endif
- #ifndef SQLITE_SUBTYPE
- #define SQLITE_SUBTYPE 0x000100000
- #endif
- #ifndef SQLITE_RESULT_SUBTYPE
- #define SQLITE_RESULT_SUBTYPE 0x001000000
- #endif
- #ifndef SQLITE_INDEX_CONSTRAINT_LIMIT
- #define SQLITE_INDEX_CONSTRAINT_LIMIT 73
- #endif
- #ifndef SQLITE_INDEX_CONSTRAINT_OFFSET
- #define SQLITE_INDEX_CONSTRAINT_OFFSET 74
- #endif
- #define countof(x) (sizeof(x) / sizeof((x)[0]))
- #define min(a, b) (((a) <= (b)) ? (a) : (b))
- enum VectorElementType {
- // clang-format off
- SQLITE_VEC_ELEMENT_TYPE_FLOAT32 = 223 + 0,
- SQLITE_VEC_ELEMENT_TYPE_BIT = 223 + 1,
- SQLITE_VEC_ELEMENT_TYPE_INT8 = 223 + 2,
- // clang-format on
- };
- #ifdef SQLITE_VEC_ENABLE_AVX
- #include <immintrin.h>
- #define PORTABLE_ALIGN32 __attribute__((aligned(32)))
- #define PORTABLE_ALIGN64 __attribute__((aligned(64)))
- static f32 l2_sqr_float_avx(const void *pVect1v, const void *pVect2v,
- const void *qty_ptr) {
- f32 *pVect1 = (f32 *)pVect1v;
- f32 *pVect2 = (f32 *)pVect2v;
- size_t qty = *((size_t *)qty_ptr);
- f32 PORTABLE_ALIGN32 TmpRes[8];
- size_t qty16 = qty >> 4;
- const f32 *pEnd1 = pVect1 + (qty16 << 4);
- __m256 diff, v1, v2;
- __m256 sum = _mm256_set1_ps(0);
- while (pVect1 < pEnd1) {
- v1 = _mm256_loadu_ps(pVect1);
- pVect1 += 8;
- v2 = _mm256_loadu_ps(pVect2);
- pVect2 += 8;
- diff = _mm256_sub_ps(v1, v2);
- sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
- v1 = _mm256_loadu_ps(pVect1);
- pVect1 += 8;
- v2 = _mm256_loadu_ps(pVect2);
- pVect2 += 8;
- diff = _mm256_sub_ps(v1, v2);
- sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
- }
- _mm256_store_ps(TmpRes, sum);
- return sqrt(TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] +
- TmpRes[5] + TmpRes[6] + TmpRes[7]);
- }
- #endif
- #ifdef SQLITE_VEC_ENABLE_NEON
- #include <arm_neon.h>
- #define PORTABLE_ALIGN32 __attribute__((aligned(32)))
- // thx https://github.com/nmslib/hnswlib/pull/299/files
- static f32 l2_sqr_float_neon(const void *pVect1v, const void *pVect2v,
- const void *qty_ptr) {
- f32 *pVect1 = (f32 *)pVect1v;
- f32 *pVect2 = (f32 *)pVect2v;
- size_t qty = *((size_t *)qty_ptr);
- size_t qty16 = qty >> 4;
- const f32 *pEnd1 = pVect1 + (qty16 << 4);
- float32x4_t diff, v1, v2;
- float32x4_t sum0 = vdupq_n_f32(0);
- float32x4_t sum1 = vdupq_n_f32(0);
- float32x4_t sum2 = vdupq_n_f32(0);
- float32x4_t sum3 = vdupq_n_f32(0);
- while (pVect1 < pEnd1) {
- v1 = vld1q_f32(pVect1);
- pVect1 += 4;
- v2 = vld1q_f32(pVect2);
- pVect2 += 4;
- diff = vsubq_f32(v1, v2);
- sum0 = vfmaq_f32(sum0, diff, diff);
- v1 = vld1q_f32(pVect1);
- pVect1 += 4;
- v2 = vld1q_f32(pVect2);
- pVect2 += 4;
- diff = vsubq_f32(v1, v2);
- sum1 = vfmaq_f32(sum1, diff, diff);
- v1 = vld1q_f32(pVect1);
- pVect1 += 4;
- v2 = vld1q_f32(pVect2);
- pVect2 += 4;
- diff = vsubq_f32(v1, v2);
- sum2 = vfmaq_f32(sum2, diff, diff);
- v1 = vld1q_f32(pVect1);
- pVect1 += 4;
- v2 = vld1q_f32(pVect2);
- pVect2 += 4;
- diff = vsubq_f32(v1, v2);
- sum3 = vfmaq_f32(sum3, diff, diff);
- }
- f32 sum_scalar =
- vaddvq_f32(vaddq_f32(vaddq_f32(sum0, sum1), vaddq_f32(sum2, sum3)));
- const f32 *pEnd2 = pVect1 + (qty - (qty16 << 4));
- while (pVect1 < pEnd2) {
- f32 diff = *pVect1 - *pVect2;
- sum_scalar += diff * diff;
- pVect1++;
- pVect2++;
- }
- return sqrt(sum_scalar);
- }
- static f32 l2_sqr_int8_neon(const void *pVect1v, const void *pVect2v,
- const void *qty_ptr) {
- i8 *pVect1 = (i8 *)pVect1v;
- i8 *pVect2 = (i8 *)pVect2v;
- size_t qty = *((size_t *)qty_ptr);
- const i8 *pEnd1 = pVect1 + qty;
- i32 sum_scalar = 0;
- while (pVect1 < pEnd1 - 7) {
- // loading 8 at a time
- int8x8_t v1 = vld1_s8(pVect1);
- int8x8_t v2 = vld1_s8(pVect2);
- pVect1 += 8;
- pVect2 += 8;
- // widen to protect against overflow
- int16x8_t v1_wide = vmovl_s8(v1);
- int16x8_t v2_wide = vmovl_s8(v2);
- int16x8_t diff = vsubq_s16(v1_wide, v2_wide);
- int16x8_t squared_diff = vmulq_s16(diff, diff);
- int32x4_t sum = vpaddlq_s16(squared_diff);
- sum_scalar += vgetq_lane_s32(sum, 0) + vgetq_lane_s32(sum, 1) +
- vgetq_lane_s32(sum, 2) + vgetq_lane_s32(sum, 3);
- }
- // handle leftovers
- while (pVect1 < pEnd1) {
- i16 diff = (i16)*pVect1 - (i16)*pVect2;
- sum_scalar += diff * diff;
- pVect1++;
- pVect2++;
- }
- return sqrtf(sum_scalar);
- }
- static i32 l1_int8_neon(const void *pVect1v, const void *pVect2v,
- const void *qty_ptr) {
- i8 *pVect1 = (i8 *)pVect1v;
- i8 *pVect2 = (i8 *)pVect2v;
- size_t qty = *((size_t *)qty_ptr);
- const int8_t *pEnd1 = pVect1 + qty;
- int32x4_t acc1 = vdupq_n_s32(0);
- int32x4_t acc2 = vdupq_n_s32(0);
- int32x4_t acc3 = vdupq_n_s32(0);
- int32x4_t acc4 = vdupq_n_s32(0);
- while (pVect1 < pEnd1 - 63) {
- int8x16_t v1 = vld1q_s8(pVect1);
- int8x16_t v2 = vld1q_s8(pVect2);
- int8x16_t diff1 = vabdq_s8(v1, v2);
- acc1 = vaddq_s32(acc1, vpaddlq_u16(vpaddlq_u8(diff1)));
- v1 = vld1q_s8(pVect1 + 16);
- v2 = vld1q_s8(pVect2 + 16);
- int8x16_t diff2 = vabdq_s8(v1, v2);
- acc2 = vaddq_s32(acc2, vpaddlq_u16(vpaddlq_u8(diff2)));
- v1 = vld1q_s8(pVect1 + 32);
- v2 = vld1q_s8(pVect2 + 32);
- int8x16_t diff3 = vabdq_s8(v1, v2);
- acc3 = vaddq_s32(acc3, vpaddlq_u16(vpaddlq_u8(diff3)));
- v1 = vld1q_s8(pVect1 + 48);
- v2 = vld1q_s8(pVect2 + 48);
- int8x16_t diff4 = vabdq_s8(v1, v2);
- acc4 = vaddq_s32(acc4, vpaddlq_u16(vpaddlq_u8(diff4)));
- pVect1 += 64;
- pVect2 += 64;
- }
- while (pVect1 < pEnd1 - 15) {
- int8x16_t v1 = vld1q_s8(pVect1);
- int8x16_t v2 = vld1q_s8(pVect2);
- int8x16_t diff = vabdq_s8(v1, v2);
- acc1 = vaddq_s32(acc1, vpaddlq_u16(vpaddlq_u8(diff)));
- pVect1 += 16;
- pVect2 += 16;
- }
- int32x4_t acc = vaddq_s32(vaddq_s32(acc1, acc2), vaddq_s32(acc3, acc4));
- int32_t sum = 0;
- while (pVect1 < pEnd1) {
- int32_t diff = abs((int32_t)*pVect1 - (int32_t)*pVect2);
- sum += diff;
- pVect1++;
- pVect2++;
- }
- return vaddvq_s32(acc) + sum;
- }
- static double l1_f32_neon(const void *pVect1v, const void *pVect2v,
- const void *qty_ptr) {
- f32 *pVect1 = (f32 *)pVect1v;
- f32 *pVect2 = (f32 *)pVect2v;
- size_t qty = *((size_t *)qty_ptr);
- const f32 *pEnd1 = pVect1 + qty;
- float64x2_t acc = vdupq_n_f64(0);
- while (pVect1 < pEnd1 - 3) {
- float32x4_t v1 = vld1q_f32(pVect1);
- float32x4_t v2 = vld1q_f32(pVect2);
- pVect1 += 4;
- pVect2 += 4;
- // f32x4 -> f64x2 pad for overflow
- float64x2_t low_diff = vabdq_f64(vcvt_f64_f32(vget_low_f32(v1)),
- vcvt_f64_f32(vget_low_f32(v2)));
- float64x2_t high_diff =
- vabdq_f64(vcvt_high_f64_f32(v1), vcvt_high_f64_f32(v2));
- acc = vaddq_f64(acc, vaddq_f64(low_diff, high_diff));
- }
- double sum = 0;
- while (pVect1 < pEnd1) {
- sum += fabs((double)*pVect1 - (double)*pVect2);
- pVect1++;
- pVect2++;
- }
- return vaddvq_f64(acc) + sum;
- }
- #endif
- static f32 l2_sqr_float(const void *pVect1v, const void *pVect2v,
- const void *qty_ptr) {
- f32 *pVect1 = (f32 *)pVect1v;
- f32 *pVect2 = (f32 *)pVect2v;
- size_t qty = *((size_t *)qty_ptr);
- f32 res = 0;
- for (size_t i = 0; i < qty; i++) {
- f32 t = *pVect1 - *pVect2;
- pVect1++;
- pVect2++;
- res += t * t;
- }
- return sqrt(res);
- }
- static f32 l2_sqr_int8(const void *pA, const void *pB, const void *pD) {
- i8 *a = (i8 *)pA;
- i8 *b = (i8 *)pB;
- size_t d = *((size_t *)pD);
- f32 res = 0;
- for (size_t i = 0; i < d; i++) {
- f32 t = *a - *b;
- a++;
- b++;
- res += t * t;
- }
- return sqrt(res);
- }
- static f32 distance_l2_sqr_float(const void *a, const void *b, const void *d) {
- #ifdef SQLITE_VEC_ENABLE_NEON
- if ((*(const size_t *)d) > 16) {
- return l2_sqr_float_neon(a, b, d);
- }
- #endif
- #ifdef SQLITE_VEC_ENABLE_AVX
- if (((*(const size_t *)d) % 16 == 0)) {
- return l2_sqr_float_avx(a, b, d);
- }
- #endif
- return l2_sqr_float(a, b, d);
- }
- static f32 distance_l2_sqr_int8(const void *a, const void *b, const void *d) {
- #ifdef SQLITE_VEC_ENABLE_NEON
- if ((*(const size_t *)d) > 7) {
- return l2_sqr_int8_neon(a, b, d);
- }
- #endif
- return l2_sqr_int8(a, b, d);
- }
- static i32 l1_int8(const void *pA, const void *pB, const void *pD) {
- i8 *a = (i8 *)pA;
- i8 *b = (i8 *)pB;
- size_t d = *((size_t *)pD);
- i32 res = 0;
- for (size_t i = 0; i < d; i++) {
- res += abs(*a - *b);
- a++;
- b++;
- }
- return res;
- }
- static i32 distance_l1_int8(const void *a, const void *b, const void *d) {
- #ifdef SQLITE_VEC_ENABLE_NEON
- if ((*(const size_t *)d) > 15) {
- return l1_int8_neon(a, b, d);
- }
- #endif
- return l1_int8(a, b, d);
- }
- static double l1_f32(const void *pA, const void *pB, const void *pD) {
- f32 *a = (f32 *)pA;
- f32 *b = (f32 *)pB;
- size_t d = *((size_t *)pD);
- double res = 0;
- for (size_t i = 0; i < d; i++) {
- res += fabs((double)*a - (double)*b);
- a++;
- b++;
- }
- return res;
- }
- static double distance_l1_f32(const void *a, const void *b, const void *d) {
- #ifdef SQLITE_VEC_ENABLE_NEON
- if ((*(const size_t *)d) > 3) {
- return l1_f32_neon(a, b, d);
- }
- #endif
- return l1_f32(a, b, d);
- }
- static f32 distance_cosine_float(const void *pVect1v, const void *pVect2v,
- const void *qty_ptr) {
- f32 *pVect1 = (f32 *)pVect1v;
- f32 *pVect2 = (f32 *)pVect2v;
- size_t qty = *((size_t *)qty_ptr);
- f32 dot = 0;
- f32 aMag = 0;
- f32 bMag = 0;
- for (size_t i = 0; i < qty; i++) {
- dot += *pVect1 * *pVect2;
- aMag += *pVect1 * *pVect1;
- bMag += *pVect2 * *pVect2;
- pVect1++;
- pVect2++;
- }
- return 1 - (dot / (sqrt(aMag) * sqrt(bMag)));
- }
- static f32 distance_cosine_int8(const void *pA, const void *pB,
- const void *pD) {
- i8 *a = (i8 *)pA;
- i8 *b = (i8 *)pB;
- size_t d = *((size_t *)pD);
- f32 dot = 0;
- f32 aMag = 0;
- f32 bMag = 0;
- for (size_t i = 0; i < d; i++) {
- dot += *a * *b;
- aMag += *a * *a;
- bMag += *b * *b;
- a++;
- b++;
- }
- return 1 - (dot / (sqrt(aMag) * sqrt(bMag)));
- }
- // https://github.com/facebookresearch/faiss/blob/77e2e79cd0a680adc343b9840dd865da724c579e/faiss/utils/hamming_distance/common.h#L34
- static u8 hamdist_table[256] = {
- 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
- 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
- 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
- 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
- 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
- 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
- static f32 distance_hamming_u8(u8 *a, u8 *b, size_t n) {
- int same = 0;
- for (unsigned long i = 0; i < n; i++) {
- same += hamdist_table[a[i] ^ b[i]];
- }
- return (f32)same;
- }
- #ifdef _MSC_VER
- #if !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
- // From
- // https://github.com/ngtcp2/ngtcp2/blob/b64f1e77b5e0d880b93d31f474147fae4a1d17cc/lib/ngtcp2_ringbuf.c,
- // line 34-43
- static unsigned int __builtin_popcountl(unsigned int x) {
- unsigned int c = 0;
- for (; x; ++c) {
- x &= x - 1;
- }
- return c;
- }
- #else
- #include <intrin.h>
- /*x86ÉÏûÓÐ__popcnt64£¬»á±¨´í£¬¸ÄΪ__popcnt*/
- #ifdef WIN32
- #define __builtin_popcountl __popcnt
- #else
- #define __builtin_popcountl __popcnt64
- #endif
- #endif
- #endif
- static f32 distance_hamming_u64(u64 *a, u64 *b, size_t n) {
- int same = 0;
- for (unsigned long i = 0; i < n; i++) {
- same += __builtin_popcountl(a[i] ^ b[i]);
- }
- return (f32)same;
- }
- /**
- * @brief Calculate the hamming distance between two bitvectors.
- *
- * @param a - first bitvector, MUST have d dimensions
- * @param b - second bitvector, MUST have d dimensions
- * @param d - pointer to size_t, MUST be divisible by CHAR_BIT
- * @return f32
- */
- static f32 distance_hamming(const void *a, const void *b, const void *d) {
- size_t dimensions = *((size_t *)d);
- if ((dimensions % 64) == 0) {
- return distance_hamming_u64((u64 *)a, (u64 *)b, dimensions / 8 / CHAR_BIT);
- }
- return distance_hamming_u8((u8 *)a, (u8 *)b, dimensions / CHAR_BIT);
- }
- // from SQLite source:
- // https://github.com/sqlite/sqlite/blob/a509a90958ddb234d1785ed7801880ccb18b497e/src/json.c#L153
- static const char vecJsonIsSpaceX[] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- };
- #define vecJsonIsspace(x) (vecJsonIsSpaceX[(unsigned char)x])
- typedef void (*vector_cleanup)(void *p);
- void vector_cleanup_noop(void *_) { UNUSED_PARAMETER(_); }
- #define JSON_SUBTYPE 74
- void vtab_set_error(sqlite3_vtab *pVTab, const char *zFormat, ...) {
- va_list args;
- sqlite3_free(pVTab->zErrMsg);
- va_start(args, zFormat);
- pVTab->zErrMsg = sqlite3_vmprintf(zFormat, args);
- va_end(args);
- }
- struct Array {
- size_t element_size;
- size_t length;
- size_t capacity;
- void *z;
- };
- /**
- * @brief Initial an array with the given element size and capacity.
- *
- * @param array
- * @param element_size
- * @param init_capacity
- * @return SQLITE_OK on success, error code on failure. Only error is
- * SQLITE_NOMEM
- */
- int array_init(struct Array *array, size_t element_size, size_t init_capacity) {
- int sz = element_size * init_capacity;
- void *z = sqlite3_malloc(sz);
- if (!z) {
- return SQLITE_NOMEM;
- }
- memset(z, 0, sz);
- array->element_size = element_size;
- array->length = 0;
- array->capacity = init_capacity;
- array->z = z;
- return SQLITE_OK;
- }
- int array_append(struct Array *array, const void *element) {
- if (array->length == array->capacity) {
- size_t new_capacity = array->capacity * 2 + 100;
- void *z = sqlite3_realloc64(array->z, array->element_size * new_capacity);
- if (z) {
- array->capacity = new_capacity;
- array->z = z;
- } else {
- return SQLITE_NOMEM;
- }
- }
- memcpy(&((unsigned char *)array->z)[array->length * array->element_size],
- element, array->element_size);
- array->length++;
- return SQLITE_OK;
- }
- void array_cleanup(struct Array *array) {
- if (!array)
- return;
- array->element_size = 0;
- array->length = 0;
- array->capacity = 0;
- sqlite3_free(array->z);
- array->z = NULL;
- }
- char *vector_subtype_name(int subtype) {
- switch (subtype) {
- case SQLITE_VEC_ELEMENT_TYPE_FLOAT32:
- return "float32";
- case SQLITE_VEC_ELEMENT_TYPE_INT8:
- return "int8";
- case SQLITE_VEC_ELEMENT_TYPE_BIT:
- return "bit";
- }
- return "";
- }
- char *type_name(int type) {
- switch (type) {
- case SQLITE_INTEGER:
- return "INTEGER";
- case SQLITE_BLOB:
- return "BLOB";
- case SQLITE_TEXT:
- return "TEXT";
- case SQLITE_FLOAT:
- return "FLOAT";
- case SQLITE_NULL:
- return "NULL";
- }
- return "";
- }
- typedef void (*fvec_cleanup)(f32 *vector);
- void fvec_cleanup_noop(f32 *_) { UNUSED_PARAMETER(_); }
- static int fvec_from_value(sqlite3_value *value, f32 **vector,
- size_t *dimensions, fvec_cleanup *cleanup,
- char **pzErr) {
- int value_type = sqlite3_value_type(value);
- if (value_type == SQLITE_BLOB) {
- const void *blob = sqlite3_value_blob(value);
- int bytes = sqlite3_value_bytes(value);
- if (bytes == 0) {
- *pzErr = sqlite3_mprintf("zero-length vectors are not supported.");
- return SQLITE_ERROR;
- }
- if ((bytes % sizeof(f32)) != 0) {
- *pzErr = sqlite3_mprintf("invalid float32 vector BLOB length. Must be "
- "divisible by %d, found %d",
- sizeof(f32), bytes);
- return SQLITE_ERROR;
- }
- *vector = (f32 *)blob;
- *dimensions = bytes / sizeof(f32);
- *cleanup = fvec_cleanup_noop;
- return SQLITE_OK;
- }
- if (value_type == SQLITE_TEXT) {
- const char *source = (const char *)sqlite3_value_text(value);
- int source_len = sqlite3_value_bytes(value);
- if (source_len == 0) {
- *pzErr = sqlite3_mprintf("zero-length vectors are not supported.");
- return SQLITE_ERROR;
- }
- int i = 0;
- struct Array x;
- int rc = array_init(&x, sizeof(f32), ceil(source_len / 2.0));
- if (rc != SQLITE_OK) {
- return rc;
- }
- // advance leading whitespace to first '['
- while (i < source_len) {
- if (vecJsonIsspace(source[i])) {
- i++;
- continue;
- }
- if (source[i] == '[') {
- break;
- }
- *pzErr = sqlite3_mprintf(
- "JSON array parsing error: Input does not start with '['");
- array_cleanup(&x);
- return SQLITE_ERROR;
- }
- if (source[i] != '[') {
- *pzErr = sqlite3_mprintf(
- "JSON array parsing error: Input does not start with '['");
- array_cleanup(&x);
- return SQLITE_ERROR;
- }
- int offset = i + 1;
- while (offset < source_len) {
- char *ptr = (char *)&source[offset];
- char *endptr;
- errno = 0;
- double result = strtod(ptr, &endptr);
- if ((errno != 0 && result == 0) // some interval error?
- || (errno == ERANGE &&
- (result == HUGE_VAL || result == -HUGE_VAL)) // too big / smalls
- ) {
- sqlite3_free(x.z);
- *pzErr = sqlite3_mprintf("JSON parsing error");
- return SQLITE_ERROR;
- }
- if (endptr == ptr) {
- if (*ptr != ']') {
- sqlite3_free(x.z);
- *pzErr = sqlite3_mprintf("JSON parsing error");
- return SQLITE_ERROR;
- }
- goto done;
- }
- f32 res = (f32)result;
- array_append(&x, (const void *)&res);
- offset += (endptr - ptr);
- while (offset < source_len) {
- if (vecJsonIsspace(source[offset])) {
- offset++;
- continue;
- }
- if (source[offset] == ',') {
- offset++;
- continue;
- }
- if (source[offset] == ']')
- goto done;
- break;
- }
- }
- done:
- if (x.length > 0) {
- *vector = (f32 *)x.z;
- *dimensions = x.length;
- *cleanup = (fvec_cleanup)sqlite3_free;
- return SQLITE_OK;
- }
- sqlite3_free(x.z);
- *pzErr = sqlite3_mprintf("zero-length vectors are not supported.");
- return SQLITE_ERROR;
- }
- *pzErr = sqlite3_mprintf(
- "Input must have type BLOB (compact format) or TEXT (JSON), found %s",
- type_name(value_type));
- return SQLITE_ERROR;
- }
- static int bitvec_from_value(sqlite3_value *value, u8 **vector,
- size_t *dimensions, vector_cleanup *cleanup,
- char **pzErr) {
- int value_type = sqlite3_value_type(value);
- if (value_type == SQLITE_BLOB) {
- const void *blob = sqlite3_value_blob(value);
- int bytes = sqlite3_value_bytes(value);
- if (bytes == 0) {
- *pzErr = sqlite3_mprintf("zero-length vectors are not supported.");
- return SQLITE_ERROR;
- }
- *vector = (u8 *)blob;
- *dimensions = bytes * CHAR_BIT;
- *cleanup = vector_cleanup_noop;
- return SQLITE_OK;
- }
- *pzErr = sqlite3_mprintf("Unknown type for bitvector.");
- return SQLITE_ERROR;
- }
- static int int8_vec_from_value(sqlite3_value *value, i8 **vector,
- size_t *dimensions, vector_cleanup *cleanup,
- char **pzErr) {
- int value_type = sqlite3_value_type(value);
- if (value_type == SQLITE_BLOB) {
- const void *blob = sqlite3_value_blob(value);
- int bytes = sqlite3_value_bytes(value);
- if (bytes == 0) {
- *pzErr = sqlite3_mprintf("zero-length vectors are not supported.");
- return SQLITE_ERROR;
- }
- *vector = (i8 *)blob;
- *dimensions = bytes;
- *cleanup = vector_cleanup_noop;
- return SQLITE_OK;
- }
- if (value_type == SQLITE_TEXT) {
- const char *source = (const char *)sqlite3_value_text(value);
- int source_len = sqlite3_value_bytes(value);
- int i = 0;
- if (source_len == 0) {
- *pzErr = sqlite3_mprintf("zero-length vectors are not supported.");
- return SQLITE_ERROR;
- }
- struct Array x;
- int rc = array_init(&x, sizeof(i8), ceil(source_len / 2.0));
- if (rc != SQLITE_OK) {
- return rc;
- }
- // advance leading whitespace to first '['
- while (i < source_len) {
- if (vecJsonIsspace(source[i])) {
- i++;
- continue;
- }
- if (source[i] == '[') {
- break;
- }
- *pzErr = sqlite3_mprintf(
- "JSON array parsing error: Input does not start with '['");
- array_cleanup(&x);
- return SQLITE_ERROR;
- }
- if (source[i] != '[') {
- *pzErr = sqlite3_mprintf(
- "JSON array parsing error: Input does not start with '['");
- array_cleanup(&x);
- return SQLITE_ERROR;
- }
- int offset = i + 1;
- while (offset < source_len) {
- char *ptr = (char *)&source[offset];
- char *endptr;
- errno = 0;
- long result = strtol(ptr, &endptr, 10);
- if ((errno != 0 && result == 0) ||
- (errno == ERANGE && (result == LONG_MAX || result == LONG_MIN))) {
- sqlite3_free(x.z);
- *pzErr = sqlite3_mprintf("JSON parsing error");
- return SQLITE_ERROR;
- }
- if (endptr == ptr) {
- if (*ptr != ']') {
- sqlite3_free(x.z);
- *pzErr = sqlite3_mprintf("JSON parsing error");
- return SQLITE_ERROR;
- }
- goto done;
- }
- if (result < INT8_MIN || result > INT8_MAX) {
- sqlite3_free(x.z);
- *pzErr =
- sqlite3_mprintf("JSON parsing error: value out of range for int8");
- return SQLITE_ERROR;
- }
- i8 res = (i8)result;
- array_append(&x, (const void *)&res);
- offset += (endptr - ptr);
- while (offset < source_len) {
- if (vecJsonIsspace(source[offset])) {
- offset++;
- continue;
- }
- if (source[offset] == ',') {
- offset++;
- continue;
- }
- if (source[offset] == ']')
- goto done;
- break;
- }
- }
- done:
- if (x.length > 0) {
- *vector = (i8 *)x.z;
- *dimensions = x.length;
- *cleanup = (vector_cleanup)sqlite3_free;
- return SQLITE_OK;
- }
- sqlite3_free(x.z);
- *pzErr = sqlite3_mprintf("zero-length vectors are not supported.");
- return SQLITE_ERROR;
- }
- *pzErr = sqlite3_mprintf("Unknown type for int8 vector.");
- return SQLITE_ERROR;
- }
- /**
- * @brief Extract a vector from a sqlite3_value. Can be a float32, int8, or bit
- * vector.
- *
- * @param value: the sqlite3_value to read from.
- * @param vector: Output pointer to vector data.
- * @param dimensions: Output number of dimensions
- * @param dimensions: Output vector element type
- * @param cleanup
- * @param pzErrorMessage
- * @return int SQLITE_OK on success, error code otherwise
- */
- int vector_from_value(sqlite3_value *value, void **vector, size_t *dimensions,
- enum VectorElementType *element_type,
- vector_cleanup *cleanup, char **pzErrorMessage) {
- int subtype = sqlite3_value_subtype(value);
- if (!subtype || (subtype == SQLITE_VEC_ELEMENT_TYPE_FLOAT32) ||
- (subtype == JSON_SUBTYPE)) {
- int rc = fvec_from_value(value, (f32 **)vector, dimensions,
- (fvec_cleanup *)cleanup, pzErrorMessage);
- if (rc == SQLITE_OK) {
- *element_type = SQLITE_VEC_ELEMENT_TYPE_FLOAT32;
- }
- return rc;
- }
- if (subtype == SQLITE_VEC_ELEMENT_TYPE_BIT) {
- int rc = bitvec_from_value(value, (u8 **)vector, dimensions, cleanup,
- pzErrorMessage);
- if (rc == SQLITE_OK) {
- *element_type = SQLITE_VEC_ELEMENT_TYPE_BIT;
- }
- return rc;
- }
- if (subtype == SQLITE_VEC_ELEMENT_TYPE_INT8) {
- int rc = int8_vec_from_value(value, (i8 **)vector, dimensions, cleanup,
- pzErrorMessage);
- if (rc == SQLITE_OK) {
- *element_type = SQLITE_VEC_ELEMENT_TYPE_INT8;
- }
- return rc;
- }
- *pzErrorMessage = sqlite3_mprintf("Unknown subtype: %d", subtype);
- return SQLITE_ERROR;
- }
- int ensure_vector_match(sqlite3_value *aValue, sqlite3_value *bValue, void **a,
- void **b, enum VectorElementType *element_type,
- size_t *dimensions, vector_cleanup *outACleanup,
- vector_cleanup *outBCleanup, char **outError) {
- int rc;
- enum VectorElementType aType, bType;
- size_t aDims, bDims;
- char *error = NULL;
- vector_cleanup aCleanup, bCleanup;
- rc = vector_from_value(aValue, a, &aDims, &aType, &aCleanup, &error);
- if (rc != SQLITE_OK) {
- *outError = sqlite3_mprintf("Error reading 1st vector: %s", error);
- sqlite3_free(error);
- return SQLITE_ERROR;
- }
- rc = vector_from_value(bValue, b, &bDims, &bType, &bCleanup, &error);
- if (rc != SQLITE_OK) {
- *outError = sqlite3_mprintf("Error reading 2nd vector: %s", error);
- sqlite3_free(error);
- aCleanup(a);
- return SQLITE_ERROR;
- }
- if (aType != bType) {
- *outError =
- sqlite3_mprintf("Vector type mistmatch. First vector has type %s, "
- "while the second has type %s.",
- vector_subtype_name(aType), vector_subtype_name(bType));
- aCleanup(*a);
- bCleanup(*b);
- return SQLITE_ERROR;
- }
- if (aDims != bDims) {
- *outError = sqlite3_mprintf(
- "Vector dimension mistmatch. First vector has %ld dimensions, "
- "while the second has %ld dimensions.",
- aDims, bDims);
- aCleanup(*a);
- bCleanup(*b);
- return SQLITE_ERROR;
- }
- *element_type = aType;
- *dimensions = aDims;
- *outACleanup = aCleanup;
- *outBCleanup = bCleanup;
- return SQLITE_OK;
- }
- int _cmp(const void *a, const void *b) { return (*(i64 *)a - *(i64 *)b); }
- struct VecNpyFile {
- char *path;
- size_t pathLength;
- };
- #define SQLITE_VEC_NPY_FILE_NAME "vec0-npy-file"
- #ifndef SQLITE_VEC_OMIT_FS
- static void vec_npy_file(sqlite3_context *context, int argc,
- sqlite3_value **argv) {
- assert(argc == 1);
- char *path = (char *)sqlite3_value_text(argv[0]);
- size_t pathLength = sqlite3_value_bytes(argv[0]);
- struct VecNpyFile *f;
- f = sqlite3_malloc(sizeof(*f));
- if (!f) {
- sqlite3_result_error_nomem(context);
- return;
- }
- memset(f, 0, sizeof(*f));
- f->path = path;
- f->pathLength = pathLength;
- sqlite3_result_pointer(context, f, SQLITE_VEC_NPY_FILE_NAME, sqlite3_free);
- }
- #endif
- #pragma region scalar functions
- static void vec_f32(sqlite3_context *context, int argc, sqlite3_value **argv) {
- assert(argc == 1);
- int rc;
- f32 *vector = NULL;
- size_t dimensions;
- fvec_cleanup cleanup;
- char *errmsg;
- rc = fvec_from_value(argv[0], &vector, &dimensions, &cleanup, &errmsg);
- if (rc != SQLITE_OK) {
- sqlite3_result_error(context, errmsg, -1);
- sqlite3_free(errmsg);
- return;
- }
- sqlite3_result_blob(context, vector, dimensions * sizeof(f32),
- (void (*)(void *))cleanup);
- sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32);
- }
- static void vec_bit(sqlite3_context *context, int argc, sqlite3_value **argv) {
- assert(argc == 1);
- int rc;
- u8 *vector;
- size_t dimensions;
- vector_cleanup cleanup;
- char *errmsg;
- rc = bitvec_from_value(argv[0], &vector, &dimensions, &cleanup, &errmsg);
- if (rc != SQLITE_OK) {
- sqlite3_result_error(context, errmsg, -1);
- sqlite3_free(errmsg);
- return;
- }
- sqlite3_result_blob(context, vector, dimensions / CHAR_BIT, SQLITE_TRANSIENT);
- sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT);
- cleanup(vector);
- }
- static void vec_int8(sqlite3_context *context, int argc, sqlite3_value **argv) {
- assert(argc == 1);
- int rc;
- i8 *vector;
- size_t dimensions;
- vector_cleanup cleanup;
- char *errmsg;
- rc = int8_vec_from_value(argv[0], &vector, &dimensions, &cleanup, &errmsg);
- if (rc != SQLITE_OK) {
- sqlite3_result_error(context, errmsg, -1);
- sqlite3_free(errmsg);
- return;
- }
- sqlite3_result_blob(context, vector, dimensions, SQLITE_TRANSIENT);
- sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8);
- cleanup(vector);
- }
- static void vec_length(sqlite3_context *context, int argc,
- sqlite3_value **argv) {
- assert(argc == 1);
- int rc;
- void *vector;
- size_t dimensions;
- vector_cleanup cleanup;
- char *errmsg;
- enum VectorElementType elementType;
- rc = vector_from_value(argv[0], &vector, &dimensions, &elementType, &cleanup,
- &errmsg);
- if (rc != SQLITE_OK) {
- sqlite3_result_error(context, errmsg, -1);
- sqlite3_free(errmsg);
- return;
- }
- sqlite3_result_int64(context, dimensions);
- cleanup(vector);
- }
- static void vec_distance_cosine(sqlite3_context *context, int argc,
- sqlite3_value **argv) {
- assert(argc == 2);
- int rc;
- void *a = NULL, *b = NULL;
- size_t dimensions;
- vector_cleanup aCleanup, bCleanup;
- char *error;
- enum VectorElementType elementType;
- rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
- &aCleanup, &bCleanup, &error);
- if (rc != SQLITE_OK) {
- sqlite3_result_error(context, error, -1);
- sqlite3_free(error);
- return;
- }
- switch (elementType) {
- case SQLITE_VEC_ELEMENT_TYPE_BIT: {
- sqlite3_result_error(
- context, "Cannot calculate cosine distance between two bitvectors.",
- -1);
- goto finish;
- }
- case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
- f32 result = distance_cosine_float(a, b, &dimensions);
- sqlite3_result_double(context, result);
- goto finish;
- }
- case SQLITE_VEC_ELEMENT_TYPE_INT8: {
- f32 result = distance_cosine_int8(a, b, &dimensions);
- sqlite3_result_double(context, result);
- goto finish;
- }
- }
- finish:
- aCleanup(a);
- bCleanup(b);
- return;
- }
- static void vec_distance_l2(sqlite3_context *context, int argc,
- sqlite3_value **argv) {
- assert(argc == 2);
- int rc;
- void *a = NULL, *b = NULL;
- size_t dimensions;
- vector_cleanup aCleanup, bCleanup;
- char *error;
- enum VectorElementType elementType;
- rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
- &aCleanup, &bCleanup, &error);
- if (rc != SQLITE_OK) {
- sqlite3_result_error(context, error, -1);
- sqlite3_free(error);
- return;
- }
- switch (elementType) {
- case SQLITE_VEC_ELEMENT_TYPE_BIT: {
- sqlite3_result_error(
- context, "Cannot calculate L2 distance between two bitvectors.", -1);
- goto finish;
- }
- case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
- f32 result = distance_l2_sqr_float(a, b, &dimensions);
- sqlite3_result_double(context, result);
- goto finish;
- }
- case SQLITE_VEC_ELEMENT_TYPE_INT8: {
- f32 result = distance_l2_sqr_int8(a, b, &dimensions);
- sqlite3_result_double(context, result);
- goto finish;
- }
- }
- finish:
- aCleanup(a);
- bCleanup(b);
- return;
- }
- static void vec_distance_l1(sqlite3_context *context, int argc,
- sqlite3_value **argv) {
- assert(argc == 2);
- int rc;
- void *a, *b;
- size_t dimensions;
- vector_cleanup aCleanup, bCleanup;
- char *error;
- enum VectorElementType elementType;
- rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
- &aCleanup, &bCleanup, &error);
- if (rc != SQLITE_OK) {
- sqlite3_result_error(context, error, -1);
- sqlite3_free(error);
- return;
- }
- switch (elementType) {
- case SQLITE_VEC_ELEMENT_TYPE_BIT: {
- sqlite3_result_error(
- context, "Cannot calculate L1 distance between two bitvectors.", -1);
- goto finish;
- }
- case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
- double result = distance_l1_f32(a, b, &dimensions);
- sqlite3_result_double(context, result);
- goto finish;
- }
- case SQLITE_VEC_ELEMENT_TYPE_INT8: {
- i64 result = distance_l1_int8(a, b, &dimensions);
- sqlite3_result_int(context, result);
- goto finish;
- }
- }
- finish:
- aCleanup(a);
- bCleanup(b);
- return;
- }
- static void vec_distance_hamming(sqlite3_context *context, int argc,
- sqlite3_value **argv) {
- assert(argc == 2);
- int rc;
- void *a = NULL, *b = NULL;
- size_t dimensions;
- vector_cleanup aCleanup, bCleanup;
- char *error;
- enum VectorElementType elementType;
- rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
- &aCleanup, &bCleanup, &error);
- if (rc != SQLITE_OK) {
- sqlite3_result_error(context, error, -1);
- sqlite3_free(error);
- return;
- }
- switch (elementType) {
- case SQLITE_VEC_ELEMENT_TYPE_BIT: {
- sqlite3_result_double(context, distance_hamming(a, b, &dimensions));
- goto finish;
- }
- case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
- sqlite3_result_error(
- context,
- "Cannot calculate hamming distance between two float32 vectors.", -1);
- goto finish;
- }
- case SQLITE_VEC_ELEMENT_TYPE_INT8: {
- sqlite3_result_error(
- context, "Cannot calculate hamming distance between two int8 vectors.",
- -1);
- goto finish;
- }
- }
- finish:
- aCleanup(a);
- bCleanup(b);
- return;
- }
- char *vec_type_name(enum VectorElementType elementType) {
- switch (elementType) {
- case SQLITE_VEC_ELEMENT_TYPE_FLOAT32:
- return "float32";
- case SQLITE_VEC_ELEMENT_TYPE_INT8:
- return "int8";
- case SQLITE_VEC_ELEMENT_TYPE_BIT:
- return "bit";
- }
- return "";
- }
- static void vec_type(sqlite3_context *context, int argc, sqlite3_value **argv) {
- assert(argc == 1);
- void *vector;
- size_t dimensions;
- vector_cleanup cleanup;
- char *pzError;
- enum VectorElementType elementType;
- int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
- &cleanup, &pzError);
- if (rc != SQLITE_OK) {
- sqlite3_result_error(context, pzError, -1);
- sqlite3_free(pzError);
- return;
- }
- sqlite3_result_text(context, vec_type_name(elementType), -1, SQLITE_STATIC);
- cleanup(vector);
- }
- static void vec_quantize_binary(sqlite3_context *context, int argc,
- sqlite3_value **argv) {
- assert(argc == 1);
- void *vector;
- size_t dimensions;
- vector_cleanup vectorCleanup;
- char *pzError;
- enum VectorElementType elementType;
- int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
- &vectorCleanup, &pzError);
- if (rc != SQLITE_OK) {
- sqlite3_result_error(context, pzError, -1);
- sqlite3_free(pzError);
- return;
- }
- if (dimensions <= 0) {
- sqlite3_result_error(context, "Zero length vectors are not supported.", -1);
- goto cleanup;
- return;
- }
- if ((dimensions % CHAR_BIT) != 0) {
- sqlite3_result_error(
- context,
- "Binary quantization requires vectors with a length divisible by 8",
- -1);
- goto cleanup;
- return;
- }
- int sz = dimensions / CHAR_BIT;
- u8 *out = sqlite3_malloc(sz);
- if (!out) {
- sqlite3_result_error_code(context, SQLITE_NOMEM);
- goto cleanup;
- return;
- }
- memset(out, 0, sz);
- switch (elementType) {
- case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
- for (size_t i = 0; i < dimensions; i++) {
- int res = ((f32 *)vector)[i] > 0.0;
- out[i / 8] |= (res << (i % 8));
- }
- break;
- }
- case SQLITE_VEC_ELEMENT_TYPE_INT8: {
- for (size_t i = 0; i < dimensions; i++) {
- int res = ((i8 *)vector)[i] > 0;
- out[i / 8] |= (res << (i % 8));
- }
- break;
- }
- case SQLITE_VEC_ELEMENT_TYPE_BIT: {
- sqlite3_result_error(context,
- "Can only binary quantize float or int8 vectors", -1);
- sqlite3_free(out);
- return;
- }
- }
- sqlite3_result_blob(context, out, sz, sqlite3_free);
- sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT);
- cleanup:
- vectorCleanup(vector);
- }
- static void vec_quantize_int8(sqlite3_context *context, int argc,
- sqlite3_value **argv) {
- assert(argc == 2);
- f32 *srcVector;
- size_t dimensions;
- fvec_cleanup srcCleanup;
- char *err;
- i8 *out = NULL;
- int rc = fvec_from_value(argv[0], &srcVector, &dimensions, &srcCleanup, &err);
- if (rc != SQLITE_OK) {
- sqlite3_result_error(context, err, -1);
- sqlite3_free(err);
- return;
- }
- int sz = dimensions * sizeof(i8);
- out = sqlite3_malloc(sz);
- if (!out) {
- sqlite3_result_error_nomem(context);
- goto cleanup;
- }
- memset(out, 0, sz);
- if ((sqlite3_value_type(argv[1]) != SQLITE_TEXT) ||
- (sqlite3_value_bytes(argv[1]) != strlen("unit")) ||
- (sqlite3_stricmp((const char *)sqlite3_value_text(argv[1]), "unit") !=
- 0)) {
- sqlite3_result_error(
- context, "2nd argument to vec_quantize_int8() must be 'unit'.", -1);
- sqlite3_free(out);
- goto cleanup;
- }
- f32 step = (1.0 - (-1.0)) / 255;
- for (size_t i = 0; i < dimensions; i++) {
- out[i] = ((srcVector[i] - (-1.0)) / step) - 128;
- }
- sqlite3_result_blob(context, out, dimensions * sizeof(i8), sqlite3_free);
- sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8);
- cleanup:
- srcCleanup(srcVector);
- }
- static void vec_add(sqlite3_context *context, int argc, sqlite3_value **argv) {
- assert(argc == 2);
- int rc;
- void *a = NULL, *b = NULL;
- size_t dimensions;
- vector_cleanup aCleanup, bCleanup;
- char *error;
- enum VectorElementType elementType;
- rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
- &aCleanup, &bCleanup, &error);
- if (rc != SQLITE_OK) {
- sqlite3_result_error(context, error, -1);
- sqlite3_free(error);
- return;
- }
- switch (elementType) {
- case SQLITE_VEC_ELEMENT_TYPE_BIT: {
- sqlite3_result_error(context, "Cannot add two bitvectors together.", -1);
- goto finish;
- }
- case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
- size_t outSize = dimensions * sizeof(f32);
- f32 *out = sqlite3_malloc(outSize);
- if (!out) {
- sqlite3_result_error_nomem(context);
- goto finish;
- }
- memset(out, 0, outSize);
- for (size_t i = 0; i < dimensions; i++) {
- out[i] = ((f32 *)a)[i] + ((f32 *)b)[i];
- }
- sqlite3_result_blob(context, out, outSize, sqlite3_free);
- sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32);
- goto finish;
- }
- case SQLITE_VEC_ELEMENT_TYPE_INT8: {
- size_t outSize = dimensions * sizeof(i8);
- i8 *out = sqlite3_malloc(outSize);
- if (!out) {
- sqlite3_result_error_nomem(context);
- goto finish;
- }
- memset(out, 0, outSize);
- for (size_t i = 0; i < dimensions; i++) {
- out[i] = ((i8 *)a)[i] + ((i8 *)b)[i];
- }
- sqlite3_result_blob(context, out, outSize, sqlite3_free);
- sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8);
- goto finish;
- }
- }
- finish:
- aCleanup(a);
- bCleanup(b);
- return;
- }
- static void vec_sub(sqlite3_context *context, int argc, sqlite3_value **argv) {
- assert(argc == 2);
- int rc;
- void *a = NULL, *b = NULL;
- size_t dimensions;
- vector_cleanup aCleanup, bCleanup;
- char *error;
- enum VectorElementType elementType;
- rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
- &aCleanup, &bCleanup, &error);
- if (rc != SQLITE_OK) {
- sqlite3_result_error(context, error, -1);
- sqlite3_free(error);
- return;
- }
- switch (elementType) {
- case SQLITE_VEC_ELEMENT_TYPE_BIT: {
- sqlite3_result_error(context, "Cannot subtract two bitvectors together.",
- -1);
- goto finish;
- }
- case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
- size_t outSize = dimensions * sizeof(f32);
- f32 *out = sqlite3_malloc(outSize);
- if (!out) {
- sqlite3_result_error_nomem(context);
- goto finish;
- }
- memset(out, 0, outSize);
- for (size_t i = 0; i < dimensions; i++) {
- out[i] = ((f32 *)a)[i] - ((f32 *)b)[i];
- }
- sqlite3_result_blob(context, out, outSize, sqlite3_free);
- sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32);
- goto finish;
- }
- case SQLITE_VEC_ELEMENT_TYPE_INT8: {
- size_t outSize = dimensions * sizeof(i8);
- i8 *out = sqlite3_malloc(outSize);
- if (!out) {
- sqlite3_result_error_nomem(context);
- goto finish;
- }
- memset(out, 0, outSize);
- for (size_t i = 0; i < dimensions; i++) {
- out[i] = ((i8 *)a)[i] - ((i8 *)b)[i];
- }
- sqlite3_result_blob(context, out, outSize, sqlite3_free);
- sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8);
- goto finish;
- }
- }
- finish:
- aCleanup(a);
- bCleanup(b);
- return;
- }
- static void vec_slice(sqlite3_context *context, int argc,
- sqlite3_value **argv) {
- assert(argc == 3);
- void *vector;
- size_t dimensions;
- vector_cleanup cleanup;
- char *err;
- enum VectorElementType elementType;
- int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
- &cleanup, &err);
- if (rc != SQLITE_OK) {
- sqlite3_result_error(context, err, -1);
- sqlite3_free(err);
- return;
- }
- int start = sqlite3_value_int(argv[1]);
- int end = sqlite3_value_int(argv[2]);
- if (start < 0) {
- sqlite3_result_error(context,
- "slice 'start' index must be a postive number.", -1);
- goto done;
- }
- if (end < 0) {
- sqlite3_result_error(context, "slice 'end' index must be a postive number.",
- -1);
- goto done;
- }
- if (((size_t)start) > dimensions) {
- sqlite3_result_error(
- context, "slice 'start' index is greater than the number of dimensions",
- -1);
- goto done;
- }
- if (((size_t)end) > dimensions) {
- sqlite3_result_error(
- context, "slice 'end' index is greater than the number of dimensions",
- -1);
- goto done;
- }
- if (start > end) {
- sqlite3_result_error(context,
- "slice 'start' index is greater than 'end' index", -1);
- goto done;
- }
- if (start == end) {
- sqlite3_result_error(context,
- "slice 'start' index is equal to the 'end' index, "
- "vectors must have non-zero length",
- -1);
- goto done;
- }
- size_t n = end - start;
- switch (elementType) {
- case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
- int outSize = n * sizeof(f32);
- f32 *out = sqlite3_malloc(outSize);
- if (!out) {
- sqlite3_result_error_nomem(context);
- goto done;
- }
- memset(out, 0, outSize);
- for (size_t i = 0; i < n; i++) {
- out[i] = ((f32 *)vector)[start + i];
- }
- sqlite3_result_blob(context, out, outSize, sqlite3_free);
- sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32);
- goto done;
- }
- case SQLITE_VEC_ELEMENT_TYPE_INT8: {
- int outSize = n * sizeof(i8);
- i8 *out = sqlite3_malloc(outSize);
- if (!out) {
- sqlite3_result_error_nomem(context);
- return;
- }
- memset(out, 0, outSize);
- for (size_t i = 0; i < n; i++) {
- out[i] = ((i8 *)vector)[start + i];
- }
- sqlite3_result_blob(context, out, outSize, sqlite3_free);
- sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8);
- goto done;
- }
- case SQLITE_VEC_ELEMENT_TYPE_BIT: {
- if ((start % CHAR_BIT) != 0) {
- sqlite3_result_error(context, "start index must be divisible by 8.", -1);
- goto done;
- }
- if ((end % CHAR_BIT) != 0) {
- sqlite3_result_error(context, "end index must be divisible by 8.", -1);
- goto done;
- }
- int outSize = n / CHAR_BIT;
- u8 *out = sqlite3_malloc(outSize);
- if (!out) {
- sqlite3_result_error_nomem(context);
- return;
- }
- memset(out, 0, outSize);
- for (size_t i = 0; i < n / CHAR_BIT; i++) {
- out[i] = ((u8 *)vector)[(start / CHAR_BIT) + i];
- }
- sqlite3_result_blob(context, out, outSize, sqlite3_free);
- sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT);
- goto done;
- }
- }
- done:
- cleanup(vector);
- }
- static void vec_to_json(sqlite3_context *context, int argc,
- sqlite3_value **argv) {
- assert(argc == 1);
- void *vector;
- size_t dimensions;
- vector_cleanup cleanup;
- char *err;
- enum VectorElementType elementType;
- int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
- &cleanup, &err);
- if (rc != SQLITE_OK) {
- sqlite3_result_error(context, err, -1);
- sqlite3_free(err);
- return;
- }
- sqlite3_str *str = sqlite3_str_new(sqlite3_context_db_handle(context));
- sqlite3_str_appendall(str, "[");
- for (size_t i = 0; i < dimensions; i++) {
- if (i != 0) {
- sqlite3_str_appendall(str, ",");
- }
- if (elementType == SQLITE_VEC_ELEMENT_TYPE_FLOAT32) {
- f32 value = ((f32 *)vector)[i];
- if (isnan(value)) {
- sqlite3_str_appendall(str, "null");
- } else {
- sqlite3_str_appendf(str, "%f", value);
- }
- } else if (elementType == SQLITE_VEC_ELEMENT_TYPE_INT8) {
- sqlite3_str_appendf(str, "%d", ((i8 *)vector)[i]);
- } else if (elementType == SQLITE_VEC_ELEMENT_TYPE_BIT) {
- u8 b = (((u8 *)vector)[i / 8] >> (i % CHAR_BIT)) & 1;
- sqlite3_str_appendf(str, "%d", b);
- }
- }
- sqlite3_str_appendall(str, "]");
- int len = sqlite3_str_length(str);
- char *s = sqlite3_str_finish(str);
- if (s) {
- sqlite3_result_text(context, s, len, sqlite3_free);
- sqlite3_result_subtype(context, JSON_SUBTYPE);
- } else {
- sqlite3_result_error_nomem(context);
- }
- cleanup(vector);
- }
- static void vec_normalize(sqlite3_context *context, int argc,
- sqlite3_value **argv) {
- assert(argc == 1);
- void *vector;
- size_t dimensions;
- vector_cleanup cleanup;
- char *err;
- enum VectorElementType elementType;
- int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
- &cleanup, &err);
- if (rc != SQLITE_OK) {
- sqlite3_result_error(context, err, -1);
- sqlite3_free(err);
- return;
- }
- if (elementType != SQLITE_VEC_ELEMENT_TYPE_FLOAT32) {
- sqlite3_result_error(
- context, "only float32 vectors are supported when normalizing", -1);
- cleanup(vector);
- return;
- }
- int outSize = dimensions * sizeof(f32);
- f32 *out = sqlite3_malloc(outSize);
- if (!out) {
- cleanup(vector);
- sqlite3_result_error_code(context, SQLITE_NOMEM);
- return;
- }
- memset(out, 0, outSize);
- f32 *v = (f32 *)vector;
- f32 norm = 0;
- for (size_t i = 0; i < dimensions; i++) {
- norm += v[i] * v[i];
- }
- norm = sqrt(norm);
- for (size_t i = 0; i < dimensions; i++) {
- out[i] = v[i] / norm;
- }
- sqlite3_result_blob(context, out, dimensions * sizeof(f32), sqlite3_free);
- sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32);
- cleanup(vector);
- }
- static void _static_text_func(sqlite3_context *context, int argc,
- sqlite3_value **argv) {
- UNUSED_PARAMETER(argc);
- UNUSED_PARAMETER(argv);
- sqlite3_result_text(context, sqlite3_user_data(context), -1, SQLITE_STATIC);
- }
- #pragma endregion
- enum Vec0TokenType {
- TOKEN_TYPE_IDENTIFIER,
- TOKEN_TYPE_DIGIT,
- TOKEN_TYPE_LBRACKET,
- TOKEN_TYPE_RBRACKET,
- TOKEN_TYPE_PLUS,
- TOKEN_TYPE_EQ,
- };
- struct Vec0Token {
- enum Vec0TokenType token_type;
- char *start;
- char *end;
- };
- int is_alpha(char x) {
- return (x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z');
- }
- int is_digit(char x) { return (x >= '0' && x <= '9'); }
- int is_whitespace(char x) {
- return x == ' ' || x == '\t' || x == '\n' || x == '\r';
- }
- #define VEC0_TOKEN_RESULT_EOF 1
- #define VEC0_TOKEN_RESULT_SOME 2
- #define VEC0_TOKEN_RESULT_ERROR 3
- int vec0_token_next(char *start, char *end, struct Vec0Token *out) {
- char *ptr = start;
- while (ptr < end) {
- char curr = *ptr;
- if (is_whitespace(curr)) {
- ptr++;
- continue;
- } else if (curr == '+') {
- ptr++;
- out->start = ptr;
- out->end = ptr;
- out->token_type = TOKEN_TYPE_PLUS;
- return VEC0_TOKEN_RESULT_SOME;
- } else if (curr == '[') {
- ptr++;
- out->start = ptr;
- out->end = ptr;
- out->token_type = TOKEN_TYPE_LBRACKET;
- return VEC0_TOKEN_RESULT_SOME;
- } else if (curr == ']') {
- ptr++;
- out->start = ptr;
- out->end = ptr;
- out->token_type = TOKEN_TYPE_RBRACKET;
- return VEC0_TOKEN_RESULT_SOME;
- } else if (curr == '=') {
- ptr++;
- out->start = ptr;
- out->end = ptr;
- out->token_type = TOKEN_TYPE_EQ;
- return VEC0_TOKEN_RESULT_SOME;
- } else if (is_alpha(curr)) {
- char *start = ptr;
- while (ptr < end && (is_alpha(*ptr) || is_digit(*ptr) || *ptr == '_')) {
- ptr++;
- }
- out->start = start;
- out->end = ptr;
- out->token_type = TOKEN_TYPE_IDENTIFIER;
- return VEC0_TOKEN_RESULT_SOME;
- } else if (is_digit(curr)) {
- char *start = ptr;
- while (ptr < end && (is_digit(*ptr))) {
- ptr++;
- }
- out->start = start;
- out->end = ptr;
- out->token_type = TOKEN_TYPE_DIGIT;
- return VEC0_TOKEN_RESULT_SOME;
- } else {
- return VEC0_TOKEN_RESULT_ERROR;
- }
- }
- return VEC0_TOKEN_RESULT_EOF;
- }
- struct Vec0Scanner {
- char *start;
- char *end;
- char *ptr;
- };
- void vec0_scanner_init(struct Vec0Scanner *scanner, const char *source,
- int source_length) {
- scanner->start = (char *)source;
- scanner->end = (char *)source + source_length;
- scanner->ptr = (char *)source;
- }
- int vec0_scanner_next(struct Vec0Scanner *scanner, struct Vec0Token *out) {
- int rc = vec0_token_next(scanner->start, scanner->end, out);
- if (rc == VEC0_TOKEN_RESULT_SOME) {
- scanner->start = out->end;
- }
- return rc;
- }
- int vec0_parse_table_option(const char *source, int source_length,
- char **out_key, int *out_key_length,
- char **out_value, int *out_value_length) {
- int rc;
- struct Vec0Scanner scanner;
- struct Vec0Token token;
- char *key;
- char *value;
- int keyLength, valueLength;
- vec0_scanner_init(&scanner, source, source_length);
- rc = vec0_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME &&
- token.token_type != TOKEN_TYPE_IDENTIFIER) {
- return SQLITE_EMPTY;
- }
- key = token.start;
- keyLength = token.end - token.start;
- rc = vec0_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME && token.token_type != TOKEN_TYPE_EQ) {
- return SQLITE_EMPTY;
- }
- rc = vec0_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME &&
- !((token.token_type == TOKEN_TYPE_IDENTIFIER) ||
- (token.token_type == TOKEN_TYPE_DIGIT))) {
- return SQLITE_ERROR;
- }
- value = token.start;
- valueLength = token.end - token.start;
- rc = vec0_scanner_next(&scanner, &token);
- if (rc == VEC0_TOKEN_RESULT_EOF) {
- *out_key = key;
- *out_key_length = keyLength;
- *out_value = value;
- *out_value_length = valueLength;
- return SQLITE_OK;
- }
- return SQLITE_ERROR;
- }
- /**
- * @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if
- * it's a PARTITION KEY definition.
- *
- * @param source: argv[i] source string
- * @param source_length: length of the source string
- * @param out_column_name: If it is a partition key, the output column name. Same lifetime
- * as source, points to specific char *
- * @param out_column_name_length: Length of out_column_name in bytes
- * @param out_column_type: SQLITE_TEXT or SQLITE_INTEGER.
- * @return int: SQLITE_EMPTY if not a PK, SQLITE_OK if it is.
- */
- int vec0_parse_partition_key_definition(const char *source, int source_length,
- char **out_column_name,
- int *out_column_name_length,
- int *out_column_type) {
- struct Vec0Scanner scanner;
- struct Vec0Token token;
- char *column_name;
- int column_name_length;
- int column_type;
- vec0_scanner_init(&scanner, source, source_length);
- // Check first token is identifier, will be the column name
- int rc = vec0_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME &&
- token.token_type != TOKEN_TYPE_IDENTIFIER) {
- return SQLITE_EMPTY;
- }
- column_name = token.start;
- column_name_length = token.end - token.start;
- // Check the next token matches "text" or "integer", as column type
- rc = vec0_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME &&
- token.token_type != TOKEN_TYPE_IDENTIFIER) {
- return SQLITE_EMPTY;
- }
- if (sqlite3_strnicmp(token.start, "text", token.end - token.start) == 0) {
- column_type = SQLITE_TEXT;
- } else if (sqlite3_strnicmp(token.start, "int", token.end - token.start) ==
- 0 ||
- sqlite3_strnicmp(token.start, "integer",
- token.end - token.start) == 0) {
- column_type = SQLITE_INTEGER;
- } else {
- return SQLITE_EMPTY;
- }
- // Check the next token is identifier and matches "partition"
- rc = vec0_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME &&
- token.token_type != TOKEN_TYPE_IDENTIFIER) {
- return SQLITE_EMPTY;
- }
- if (sqlite3_strnicmp(token.start, "partition", token.end - token.start) != 0) {
- return SQLITE_EMPTY;
- }
- // Check the next token is identifier and matches "key"
- rc = vec0_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME &&
- token.token_type != TOKEN_TYPE_IDENTIFIER) {
- return SQLITE_EMPTY;
- }
- if (sqlite3_strnicmp(token.start, "key", token.end - token.start) != 0) {
- return SQLITE_EMPTY;
- }
- *out_column_name = column_name;
- *out_column_name_length = column_name_length;
- *out_column_type = column_type;
- return SQLITE_OK;
- }
- /**
- * @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if
- * it's an auxiliar column definition, ie `+[name] [type]` like `+contents text`
- *
- * @param source: argv[i] source string
- * @param source_length: length of the source string
- * @param out_column_name: If it is a partition key, the output column name. Same lifetime
- * as source, points to specific char *
- * @param out_column_name_length: Length of out_column_name in bytes
- * @param out_column_type: SQLITE_TEXT, SQLITE_INTEGER, SQLITE_FLOAT, or SQLITE_BLOB.
- * @return int: SQLITE_EMPTY if not an aux column, SQLITE_OK if it is.
- */
- int vec0_parse_auxiliary_column_definition(const char *source, int source_length,
- char **out_column_name,
- int *out_column_name_length,
- int *out_column_type) {
- struct Vec0Scanner scanner;
- struct Vec0Token token;
- char *column_name;
- int column_name_length;
- int column_type;
- vec0_scanner_init(&scanner, source, source_length);
- // Check first token is '+', which denotes aux columns
- int rc = vec0_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME ||
- token.token_type != TOKEN_TYPE_PLUS) {
- return SQLITE_EMPTY;
- }
- rc = vec0_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME &&
- token.token_type != TOKEN_TYPE_IDENTIFIER) {
- return SQLITE_EMPTY;
- }
- column_name = token.start;
- column_name_length = token.end - token.start;
- // Check the next token matches "text" or "integer", as column type
- rc = vec0_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME &&
- token.token_type != TOKEN_TYPE_IDENTIFIER) {
- return SQLITE_EMPTY;
- }
- if (sqlite3_strnicmp(token.start, "text", token.end - token.start) == 0) {
- column_type = SQLITE_TEXT;
- } else if (sqlite3_strnicmp(token.start, "int", token.end - token.start) ==
- 0 ||
- sqlite3_strnicmp(token.start, "integer",
- token.end - token.start) == 0) {
- column_type = SQLITE_INTEGER;
- } else if (sqlite3_strnicmp(token.start, "float", token.end - token.start) ==
- 0 ||
- sqlite3_strnicmp(token.start, "double",
- token.end - token.start) == 0) {
- column_type = SQLITE_FLOAT;
- } else if (sqlite3_strnicmp(token.start, "blob", token.end - token.start) ==0) {
- column_type = SQLITE_BLOB;
- } else {
- return SQLITE_EMPTY;
- }
- *out_column_name = column_name;
- *out_column_name_length = column_name_length;
- *out_column_type = column_type;
- return SQLITE_OK;
- }
- typedef enum {
- VEC0_METADATA_COLUMN_KIND_BOOLEAN,
- VEC0_METADATA_COLUMN_KIND_INTEGER,
- VEC0_METADATA_COLUMN_KIND_FLOAT,
- VEC0_METADATA_COLUMN_KIND_TEXT,
- // future: blob, date, datetime
- } vec0_metadata_column_kind;
- /**
- * @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if
- * it's an metadata column definition, ie `[name] [type]` like `is_released boolean`
- *
- * @param source: argv[i] source string
- * @param source_length: length of the source string
- * @param out_column_name: If it is a metadata column, the output column name. Same lifetime
- * as source, points to specific char *
- * @param out_column_name_length: Length of out_column_name in bytes
- * @param out_column_type: one of vec0_metadata_column_kind
- * @return int: SQLITE_EMPTY if not an metadata column, SQLITE_OK if it is.
- */
- int vec0_parse_metadata_column_definition(const char *source, int source_length,
- char **out_column_name,
- int *out_column_name_length,
- vec0_metadata_column_kind *out_column_type) {
- struct Vec0Scanner scanner;
- struct Vec0Token token;
- char *column_name;
- int column_name_length;
- vec0_metadata_column_kind column_type;
- int rc;
- vec0_scanner_init(&scanner, source, source_length);
- rc = vec0_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME ||
- token.token_type != TOKEN_TYPE_IDENTIFIER) {
- return SQLITE_EMPTY;
- }
- column_name = token.start;
- column_name_length = token.end - token.start;
- // Check the next token matches a valid metadata type
- rc = vec0_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME ||
- token.token_type != TOKEN_TYPE_IDENTIFIER) {
- return SQLITE_EMPTY;
- }
- char * t = token.start;
- int n = token.end - token.start;
- if (sqlite3_strnicmp(t, "boolean", n) == 0 || sqlite3_strnicmp(t, "bool", n) == 0) {
- column_type = VEC0_METADATA_COLUMN_KIND_BOOLEAN;
- }else if (sqlite3_strnicmp(t, "int64", n) == 0 || sqlite3_strnicmp(t, "integer64", n) == 0 || sqlite3_strnicmp(t, "integer", n) == 0 || sqlite3_strnicmp(t, "int", n) == 0) {
- column_type = VEC0_METADATA_COLUMN_KIND_INTEGER;
- }else if (sqlite3_strnicmp(t, "float", n) == 0 || sqlite3_strnicmp(t, "double", n) == 0 || sqlite3_strnicmp(t, "float64", n) == 0 || sqlite3_strnicmp(t, "f64", n) == 0) {
- column_type = VEC0_METADATA_COLUMN_KIND_FLOAT;
- } else if (sqlite3_strnicmp(t, "text", n) == 0) {
- column_type = VEC0_METADATA_COLUMN_KIND_TEXT;
- } else {
- return SQLITE_EMPTY;
- }
- *out_column_name = column_name;
- *out_column_name_length = column_name_length;
- *out_column_type = column_type;
- return SQLITE_OK;
- }
- /**
- * @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if
- * it's a PRIMARY KEY definition.
- *
- * @param source: argv[i] source string
- * @param source_length: length of the source string
- * @param out_column_name: If it is a PK, the output column name. Same lifetime
- * as source, points to specific char *
- * @param out_column_name_length: Length of out_column_name in bytes
- * @param out_column_type: SQLITE_TEXT or SQLITE_INTEGER.
- * @return int: SQLITE_EMPTY if not a PK, SQLITE_OK if it is.
- */
- int vec0_parse_primary_key_definition(const char *source, int source_length,
- char **out_column_name,
- int *out_column_name_length,
- int *out_column_type) {
- struct Vec0Scanner scanner;
- struct Vec0Token token;
- char *column_name;
- int column_name_length;
- int column_type;
- vec0_scanner_init(&scanner, source, source_length);
- // Check first token is identifier, will be the column name
- int rc = vec0_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME &&
- token.token_type != TOKEN_TYPE_IDENTIFIER) {
- return SQLITE_EMPTY;
- }
- column_name = token.start;
- column_name_length = token.end - token.start;
- // Check the next token matches "text" or "integer", as column type
- rc = vec0_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME &&
- token.token_type != TOKEN_TYPE_IDENTIFIER) {
- return SQLITE_EMPTY;
- }
- if (sqlite3_strnicmp(token.start, "text", token.end - token.start) == 0) {
- column_type = SQLITE_TEXT;
- } else if (sqlite3_strnicmp(token.start, "int", token.end - token.start) ==
- 0 ||
- sqlite3_strnicmp(token.start, "integer",
- token.end - token.start) == 0) {
- column_type = SQLITE_INTEGER;
- } else {
- return SQLITE_EMPTY;
- }
- // Check the next token is identifier and matches "primary"
- rc = vec0_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME &&
- token.token_type != TOKEN_TYPE_IDENTIFIER) {
- return SQLITE_EMPTY;
- }
- if (sqlite3_strnicmp(token.start, "primary", token.end - token.start) != 0) {
- return SQLITE_EMPTY;
- }
- // Check the next token is identifier and matches "key"
- rc = vec0_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME &&
- token.token_type != TOKEN_TYPE_IDENTIFIER) {
- return SQLITE_EMPTY;
- }
- if (sqlite3_strnicmp(token.start, "key", token.end - token.start) != 0) {
- return SQLITE_EMPTY;
- }
- *out_column_name = column_name;
- *out_column_name_length = column_name_length;
- *out_column_type = column_type;
- return SQLITE_OK;
- }
- enum Vec0DistanceMetrics {
- VEC0_DISTANCE_METRIC_L2 = 1,
- VEC0_DISTANCE_METRIC_COSINE = 2,
- VEC0_DISTANCE_METRIC_L1 = 3,
- };
- struct VectorColumnDefinition {
- char *name;
- int name_length;
- size_t dimensions;
- enum VectorElementType element_type;
- enum Vec0DistanceMetrics distance_metric;
- };
- struct Vec0PartitionColumnDefinition {
- int type;
- char * name;
- int name_length;
- };
- struct Vec0AuxiliaryColumnDefinition {
- int type;
- char * name;
- int name_length;
- };
- struct Vec0MetadataColumnDefinition {
- vec0_metadata_column_kind kind;
- char * name;
- int name_length;
- };
- size_t vector_byte_size(enum VectorElementType element_type,
- size_t dimensions) {
- switch (element_type) {
- case SQLITE_VEC_ELEMENT_TYPE_FLOAT32:
- return dimensions * sizeof(f32);
- case SQLITE_VEC_ELEMENT_TYPE_INT8:
- return dimensions * sizeof(i8);
- case SQLITE_VEC_ELEMENT_TYPE_BIT:
- return dimensions / CHAR_BIT;
- }
- return 0;
- }
- size_t vector_column_byte_size(struct VectorColumnDefinition column) {
- return vector_byte_size(column.element_type, column.dimensions);
- }
- /**
- * @brief Parse an vec0 vtab argv[i] column definition and see if
- * it's a vector column defintion, ex `contents_embedding float[768]`.
- *
- * @param source vec0 argv[i] item
- * @param source_length length of source in bytes
- * @param outColumn Output the parse vector column to this struct, if success
- * @return int SQLITE_OK on success, SQLITE_EMPTY is it's not a vector column
- * definition, SQLITE_ERROR on error.
- */
- int vec0_parse_vector_column(const char *source, int source_length,
- struct VectorColumnDefinition *outColumn) {
- // parses a vector column definition like so:
- // "abc float[123]", "abc_123 bit[1234]", eetc.
- // https://github.com/asg017/sqlite-vec/issues/46
- int rc;
- struct Vec0Scanner scanner;
- struct Vec0Token token;
- char *name;
- int nameLength;
- enum VectorElementType elementType;
- enum Vec0DistanceMetrics distanceMetric = VEC0_DISTANCE_METRIC_L2;
- int dimensions;
- vec0_scanner_init(&scanner, source, source_length);
- // starts with an identifier
- rc = vec0_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME &&
- token.token_type != TOKEN_TYPE_IDENTIFIER) {
- return SQLITE_EMPTY;
- }
- name = token.start;
- nameLength = token.end - token.start;
- // vector column type comes next: float, int, or bit
- rc = vec0_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME ||
- token.token_type != TOKEN_TYPE_IDENTIFIER) {
- return SQLITE_EMPTY;
- }
- if (sqlite3_strnicmp(token.start, "float", 5) == 0 ||
- sqlite3_strnicmp(token.start, "f32", 3) == 0) {
- elementType = SQLITE_VEC_ELEMENT_TYPE_FLOAT32;
- } else if (sqlite3_strnicmp(token.start, "int8", 4) == 0 ||
- sqlite3_strnicmp(token.start, "i8", 2) == 0) {
- elementType = SQLITE_VEC_ELEMENT_TYPE_INT8;
- } else if (sqlite3_strnicmp(token.start, "bit", 3) == 0) {
- elementType = SQLITE_VEC_ELEMENT_TYPE_BIT;
- } else {
- return SQLITE_EMPTY;
- }
- // left '[' bracket
- rc = vec0_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME && token.token_type != TOKEN_TYPE_LBRACKET) {
- return SQLITE_EMPTY;
- }
- // digit, for vector dimension length
- rc = vec0_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME && token.token_type != TOKEN_TYPE_DIGIT) {
- return SQLITE_ERROR;
- }
- dimensions = atoi(token.start);
- if (dimensions <= 0) {
- return SQLITE_ERROR;
- }
- // // right ']' bracket
- rc = vec0_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME && token.token_type != TOKEN_TYPE_RBRACKET) {
- return SQLITE_ERROR;
- }
- // any other tokens left should be column-level options , ex `key=value`
- // ex `distance_metric=L2 distance_metric=cosine` should error
- while (1) {
- // should be EOF or identifier (option key)
- rc = vec0_scanner_next(&scanner, &token);
- if (rc == VEC0_TOKEN_RESULT_EOF) {
- break;
- }
- if (rc != VEC0_TOKEN_RESULT_SOME &&
- token.token_type != TOKEN_TYPE_IDENTIFIER) {
- return SQLITE_ERROR;
- }
- char *key = token.start;
- int keyLength = token.end - token.start;
- if (sqlite3_strnicmp(key, "distance_metric", keyLength) == 0) {
- if (elementType == SQLITE_VEC_ELEMENT_TYPE_BIT) {
- return SQLITE_ERROR;
- }
- // ensure equal sign after distance_metric
- rc = vec0_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME && token.token_type != TOKEN_TYPE_EQ) {
- return SQLITE_ERROR;
- }
- // distance_metric value, an identifier (L2, cosine, etc)
- rc = vec0_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME &&
- token.token_type != TOKEN_TYPE_IDENTIFIER) {
- return SQLITE_ERROR;
- }
- char *value = token.start;
- int valueLength = token.end - token.start;
- if (sqlite3_strnicmp(value, "l2", valueLength) == 0) {
- distanceMetric = VEC0_DISTANCE_METRIC_L2;
- } else if (sqlite3_strnicmp(value, "l1", valueLength) == 0) {
- distanceMetric = VEC0_DISTANCE_METRIC_L1;
- } else if (sqlite3_strnicmp(value, "cosine", valueLength) == 0) {
- distanceMetric = VEC0_DISTANCE_METRIC_COSINE;
- } else {
- return SQLITE_ERROR;
- }
- }
- // unknown key
- else {
- return SQLITE_ERROR;
- }
- }
- outColumn->name = sqlite3_mprintf("%.*s", nameLength, name);
- if (!outColumn->name) {
- return SQLITE_ERROR;
- }
- outColumn->name_length = nameLength;
- outColumn->distance_metric = distanceMetric;
- outColumn->element_type = elementType;
- outColumn->dimensions = dimensions;
- return SQLITE_OK;
- }
- #pragma region vec_each table function
- typedef struct vec_each_vtab vec_each_vtab;
- struct vec_each_vtab {
- sqlite3_vtab base;
- };
- typedef struct vec_each_cursor vec_each_cursor;
- struct vec_each_cursor {
- sqlite3_vtab_cursor base;
- i64 iRowid;
- enum VectorElementType vector_type;
- void *vector;
- size_t dimensions;
- vector_cleanup cleanup;
- };
- static int vec_eachConnect(sqlite3 *db, void *pAux, int argc,
- const char *const *argv, sqlite3_vtab **ppVtab,
- char **pzErr) {
- UNUSED_PARAMETER(pAux);
- UNUSED_PARAMETER(argc);
- UNUSED_PARAMETER(argv);
- UNUSED_PARAMETER(pzErr);
- vec_each_vtab *pNew;
- int rc;
- rc = sqlite3_declare_vtab(db, "CREATE TABLE x(value, vector hidden)");
- #define VEC_EACH_COLUMN_VALUE 0
- #define VEC_EACH_COLUMN_VECTOR 1
- if (rc == SQLITE_OK) {
- pNew = sqlite3_malloc(sizeof(*pNew));
- *ppVtab = (sqlite3_vtab *)pNew;
- if (pNew == 0)
- return SQLITE_NOMEM;
- memset(pNew, 0, sizeof(*pNew));
- }
- return rc;
- }
- static int vec_eachDisconnect(sqlite3_vtab *pVtab) {
- vec_each_vtab *p = (vec_each_vtab *)pVtab;
- sqlite3_free(p);
- return SQLITE_OK;
- }
- static int vec_eachOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) {
- UNUSED_PARAMETER(p);
- vec_each_cursor *pCur;
- pCur = sqlite3_malloc(sizeof(*pCur));
- if (pCur == 0)
- return SQLITE_NOMEM;
- memset(pCur, 0, sizeof(*pCur));
- *ppCursor = &pCur->base;
- return SQLITE_OK;
- }
- static int vec_eachClose(sqlite3_vtab_cursor *cur) {
- vec_each_cursor *pCur = (vec_each_cursor *)cur;
- if(pCur->vector) {
- pCur->cleanup(pCur->vector);
- }
- sqlite3_free(pCur);
- return SQLITE_OK;
- }
- static int vec_eachBestIndex(sqlite3_vtab *pVTab,
- sqlite3_index_info *pIdxInfo) {
- UNUSED_PARAMETER(pVTab);
- int hasVector = 0;
- for (int i = 0; i < pIdxInfo->nConstraint; i++) {
- const struct sqlite3_index_constraint *pCons = &pIdxInfo->aConstraint[i];
- // printf("i=%d iColumn=%d, op=%d, usable=%d\n", i, pCons->iColumn,
- // pCons->op, pCons->usable);
- switch (pCons->iColumn) {
- case VEC_EACH_COLUMN_VECTOR: {
- if (pCons->op == SQLITE_INDEX_CONSTRAINT_EQ && pCons->usable) {
- hasVector = 1;
- pIdxInfo->aConstraintUsage[i].argvIndex = 1;
- pIdxInfo->aConstraintUsage[i].omit = 1;
- }
- break;
- }
- }
- }
- if (!hasVector) {
- return SQLITE_CONSTRAINT;
- }
- pIdxInfo->estimatedCost = (double)100000;
- pIdxInfo->estimatedRows = 100000;
- return SQLITE_OK;
- }
- static int vec_eachFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum,
- const char *idxStr, int argc, sqlite3_value **argv) {
- UNUSED_PARAMETER(idxNum);
- UNUSED_PARAMETER(idxStr);
- assert(argc == 1);
- vec_each_cursor *pCur = (vec_each_cursor *)pVtabCursor;
- if (pCur->vector) {
- pCur->cleanup(pCur->vector);
- pCur->vector = NULL;
- }
- char *pzErrMsg;
- int rc = vector_from_value(argv[0], &pCur->vector, &pCur->dimensions,
- &pCur->vector_type, &pCur->cleanup, &pzErrMsg);
- if (rc != SQLITE_OK) {
- return SQLITE_ERROR;
- }
- pCur->iRowid = 0;
- return SQLITE_OK;
- }
- static int vec_eachRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) {
- vec_each_cursor *pCur = (vec_each_cursor *)cur;
- *pRowid = pCur->iRowid;
- return SQLITE_OK;
- }
- static int vec_eachEof(sqlite3_vtab_cursor *cur) {
- vec_each_cursor *pCur = (vec_each_cursor *)cur;
- return pCur->iRowid >= (i64)pCur->dimensions;
- }
- static int vec_eachNext(sqlite3_vtab_cursor *cur) {
- vec_each_cursor *pCur = (vec_each_cursor *)cur;
- pCur->iRowid++;
- return SQLITE_OK;
- }
- static int vec_eachColumn(sqlite3_vtab_cursor *cur, sqlite3_context *context,
- int i) {
- vec_each_cursor *pCur = (vec_each_cursor *)cur;
- switch (i) {
- case VEC_EACH_COLUMN_VALUE:
- switch (pCur->vector_type) {
- case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
- sqlite3_result_double(context, ((f32 *)pCur->vector)[pCur->iRowid]);
- break;
- }
- case SQLITE_VEC_ELEMENT_TYPE_BIT: {
- u8 x = ((u8 *)pCur->vector)[pCur->iRowid / CHAR_BIT];
- sqlite3_result_int(context,
- (x & (0b10000000 >> ((pCur->iRowid % CHAR_BIT)))) > 0);
- break;
- }
- case SQLITE_VEC_ELEMENT_TYPE_INT8: {
- sqlite3_result_int(context, ((i8 *)pCur->vector)[pCur->iRowid]);
- break;
- }
- }
- break;
- }
- return SQLITE_OK;
- }
- static sqlite3_module vec_eachModule = {
- /* iVersion */ 0,
- /* xCreate */ 0,
- /* xConnect */ vec_eachConnect,
- /* xBestIndex */ vec_eachBestIndex,
- /* xDisconnect */ vec_eachDisconnect,
- /* xDestroy */ 0,
- /* xOpen */ vec_eachOpen,
- /* xClose */ vec_eachClose,
- /* xFilter */ vec_eachFilter,
- /* xNext */ vec_eachNext,
- /* xEof */ vec_eachEof,
- /* xColumn */ vec_eachColumn,
- /* xRowid */ vec_eachRowid,
- /* xUpdate */ 0,
- /* xBegin */ 0,
- /* xSync */ 0,
- /* xCommit */ 0,
- /* xRollback */ 0,
- /* xFindMethod */ 0,
- /* xRename */ 0,
- /* xSavepoint */ 0,
- /* xRelease */ 0,
- /* xRollbackTo */ 0,
- /* xShadowName */ 0,
- #if SQLITE_VERSION_NUMBER >= 3044000
- /* xIntegrity */ 0
- #endif
- };
- #pragma endregion
- #pragma region vec_npy_each table function
- enum NpyTokenType {
- NPY_TOKEN_TYPE_IDENTIFIER,
- NPY_TOKEN_TYPE_NUMBER,
- NPY_TOKEN_TYPE_LPAREN,
- NPY_TOKEN_TYPE_RPAREN,
- NPY_TOKEN_TYPE_LBRACE,
- NPY_TOKEN_TYPE_RBRACE,
- NPY_TOKEN_TYPE_COLON,
- NPY_TOKEN_TYPE_COMMA,
- NPY_TOKEN_TYPE_STRING,
- NPY_TOKEN_TYPE_FALSE,
- };
- struct NpyToken {
- enum NpyTokenType token_type;
- unsigned char *start;
- unsigned char *end;
- };
- int npy_token_next(unsigned char *start, unsigned char *end,
- struct NpyToken *out) {
- unsigned char *ptr = start;
- while (ptr < end) {
- unsigned char curr = *ptr;
- if (is_whitespace(curr)) {
- ptr++;
- continue;
- } else if (curr == '(') {
- out->start = ptr++;
- out->end = ptr;
- out->token_type = NPY_TOKEN_TYPE_LPAREN;
- return VEC0_TOKEN_RESULT_SOME;
- } else if (curr == ')') {
- out->start = ptr++;
- out->end = ptr;
- out->token_type = NPY_TOKEN_TYPE_RPAREN;
- return VEC0_TOKEN_RESULT_SOME;
- } else if (curr == '{') {
- out->start = ptr++;
- out->end = ptr;
- out->token_type = NPY_TOKEN_TYPE_LBRACE;
- return VEC0_TOKEN_RESULT_SOME;
- } else if (curr == '}') {
- out->start = ptr++;
- out->end = ptr;
- out->token_type = NPY_TOKEN_TYPE_RBRACE;
- return VEC0_TOKEN_RESULT_SOME;
- } else if (curr == ':') {
- out->start = ptr++;
- out->end = ptr;
- out->token_type = NPY_TOKEN_TYPE_COLON;
- return VEC0_TOKEN_RESULT_SOME;
- } else if (curr == ',') {
- out->start = ptr++;
- out->end = ptr;
- out->token_type = NPY_TOKEN_TYPE_COMMA;
- return VEC0_TOKEN_RESULT_SOME;
- } else if (curr == '\'') {
- unsigned char *start = ptr;
- ptr++;
- while (ptr < end) {
- if ((*ptr) == '\'') {
- break;
- }
- ptr++;
- }
- if ((*ptr) != '\'') {
- return VEC0_TOKEN_RESULT_ERROR;
- }
- out->start = start;
- out->end = ++ptr;
- out->token_type = NPY_TOKEN_TYPE_STRING;
- return VEC0_TOKEN_RESULT_SOME;
- } else if (curr == 'F' &&
- strncmp((char *)ptr, "False", strlen("False")) == 0) {
- out->start = ptr;
- out->end = (ptr + (int)strlen("False"));
- ptr = out->end;
- out->token_type = NPY_TOKEN_TYPE_FALSE;
- return VEC0_TOKEN_RESULT_SOME;
- } else if (is_digit(curr)) {
- unsigned char *start = ptr;
- while (ptr < end && (is_digit(*ptr))) {
- ptr++;
- }
- out->start = start;
- out->end = ptr;
- out->token_type = NPY_TOKEN_TYPE_NUMBER;
- return VEC0_TOKEN_RESULT_SOME;
- } else {
- return VEC0_TOKEN_RESULT_ERROR;
- }
- }
- return VEC0_TOKEN_RESULT_ERROR;
- }
- struct NpyScanner {
- unsigned char *start;
- unsigned char *end;
- unsigned char *ptr;
- };
- void npy_scanner_init(struct NpyScanner *scanner, const unsigned char *source,
- int source_length) {
- scanner->start = (unsigned char *)source;
- scanner->end = (unsigned char *)source + source_length;
- scanner->ptr = (unsigned char *)source;
- }
- int npy_scanner_next(struct NpyScanner *scanner, struct NpyToken *out) {
- int rc = npy_token_next(scanner->start, scanner->end, out);
- if (rc == VEC0_TOKEN_RESULT_SOME) {
- scanner->start = out->end;
- }
- return rc;
- }
- #define NPY_PARSE_ERROR "Error parsing numpy array: "
- int parse_npy_header(sqlite3_vtab *pVTab, const unsigned char *header,
- size_t headerLength,
- enum VectorElementType *out_element_type,
- int *fortran_order, size_t *numElements,
- size_t *numDimensions) {
- struct NpyScanner scanner;
- struct NpyToken token;
- int rc;
- npy_scanner_init(&scanner, header, headerLength);
- if (npy_scanner_next(&scanner, &token) != VEC0_TOKEN_RESULT_SOME &&
- token.token_type != NPY_TOKEN_TYPE_LBRACE) {
- vtab_set_error(pVTab,
- NPY_PARSE_ERROR "numpy header did not start with '{'");
- return SQLITE_ERROR;
- }
- while (1) {
- rc = npy_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME) {
- vtab_set_error(pVTab, NPY_PARSE_ERROR "expected key in numpy header");
- return SQLITE_ERROR;
- }
- if (token.token_type == NPY_TOKEN_TYPE_RBRACE) {
- break;
- }
- if (token.token_type != NPY_TOKEN_TYPE_STRING) {
- vtab_set_error(pVTab, NPY_PARSE_ERROR
- "expected a string as key in numpy header");
- return SQLITE_ERROR;
- }
- unsigned char *key = token.start;
- rc = npy_scanner_next(&scanner, &token);
- if ((rc != VEC0_TOKEN_RESULT_SOME) ||
- (token.token_type != NPY_TOKEN_TYPE_COLON)) {
- vtab_set_error(pVTab, NPY_PARSE_ERROR
- "expected a ':' after key in numpy header");
- return SQLITE_ERROR;
- }
- if (strncmp((char *)key, "'descr'", strlen("'descr'")) == 0) {
- rc = npy_scanner_next(&scanner, &token);
- if ((rc != VEC0_TOKEN_RESULT_SOME) ||
- (token.token_type != NPY_TOKEN_TYPE_STRING)) {
- vtab_set_error(pVTab, NPY_PARSE_ERROR
- "expected a string value after 'descr' key");
- return SQLITE_ERROR;
- }
- if (strncmp((char *)token.start, "'<f4'", strlen("'<f4'")) != 0) {
- vtab_set_error(
- pVTab, NPY_PARSE_ERROR
- "Only '<f4' values are supported in sqlite-vec numpy functions");
- return SQLITE_ERROR;
- }
- *out_element_type = SQLITE_VEC_ELEMENT_TYPE_FLOAT32;
- } else if (strncmp((char *)key, "'fortran_order'",
- strlen("'fortran_order'")) == 0) {
- rc = npy_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME ||
- token.token_type != NPY_TOKEN_TYPE_FALSE) {
- vtab_set_error(pVTab, NPY_PARSE_ERROR
- "Only fortran_order = False is supported in sqlite-vec "
- "numpy functions");
- return SQLITE_ERROR;
- }
- *fortran_order = 0;
- } else if (strncmp((char *)key, "'shape'", strlen("'shape'")) == 0) {
- // "(xxx, xxx)" OR (xxx,)
- size_t first;
- rc = npy_scanner_next(&scanner, &token);
- if ((rc != VEC0_TOKEN_RESULT_SOME) ||
- (token.token_type != NPY_TOKEN_TYPE_LPAREN)) {
- vtab_set_error(pVTab, NPY_PARSE_ERROR
- "Expected left parenthesis '(' after shape key");
- return SQLITE_ERROR;
- }
- rc = npy_scanner_next(&scanner, &token);
- if ((rc != VEC0_TOKEN_RESULT_SOME) ||
- (token.token_type != NPY_TOKEN_TYPE_NUMBER)) {
- vtab_set_error(pVTab, NPY_PARSE_ERROR
- "Expected an initial number in shape value");
- return SQLITE_ERROR;
- }
- first = strtol((char *)token.start, NULL, 10);
- rc = npy_scanner_next(&scanner, &token);
- if ((rc != VEC0_TOKEN_RESULT_SOME) ||
- (token.token_type != NPY_TOKEN_TYPE_COMMA)) {
- vtab_set_error(pVTab, NPY_PARSE_ERROR
- "Expected comma after first shape value");
- return SQLITE_ERROR;
- }
- rc = npy_scanner_next(&scanner, &token);
- if (rc != VEC0_TOKEN_RESULT_SOME) {
- vtab_set_error(pVTab, NPY_PARSE_ERROR
- "unexpected header EOF while parsing shape");
- return SQLITE_ERROR;
- }
- if (token.token_type == NPY_TOKEN_TYPE_NUMBER) {
- *numElements = first;
- *numDimensions = strtol((char *)token.start, NULL, 10);
- rc = npy_scanner_next(&scanner, &token);
- if ((rc != VEC0_TOKEN_RESULT_SOME) ||
- (token.token_type != NPY_TOKEN_TYPE_RPAREN)) {
- vtab_set_error(pVTab, NPY_PARSE_ERROR
- "expected right parenthesis after shape value");
- return SQLITE_ERROR;
- }
- } else if (token.token_type == NPY_TOKEN_TYPE_RPAREN) {
- // '(0,)' means an empty array!
- *numElements = first ? 1 : 0;
- *numDimensions = first;
- } else {
- vtab_set_error(pVTab, NPY_PARSE_ERROR "unknown type in shape value");
- return SQLITE_ERROR;
- }
- } else {
- vtab_set_error(pVTab, NPY_PARSE_ERROR "unknown key in numpy header");
- return SQLITE_ERROR;
- }
- rc = npy_scanner_next(&scanner, &token);
- if ((rc != VEC0_TOKEN_RESULT_SOME) ||
- (token.token_type != NPY_TOKEN_TYPE_COMMA)) {
- vtab_set_error(pVTab, NPY_PARSE_ERROR "unknown extra token after value");
- return SQLITE_ERROR;
- }
- }
- return SQLITE_OK;
- }
- typedef struct vec_npy_each_vtab vec_npy_each_vtab;
- struct vec_npy_each_vtab {
- sqlite3_vtab base;
- };
- typedef enum {
- VEC_NPY_EACH_INPUT_BUFFER,
- VEC_NPY_EACH_INPUT_FILE,
- } vec_npy_each_input_type;
- typedef struct vec_npy_each_cursor vec_npy_each_cursor;
- struct vec_npy_each_cursor {
- sqlite3_vtab_cursor base;
- i64 iRowid;
- // sqlite-vec compatible type of vector
- enum VectorElementType elementType;
- // number of vectors in the npy array
- size_t nElements;
- // number of dimensions each vector has
- size_t nDimensions;
- vec_npy_each_input_type input_type;
- // when input_type == VEC_NPY_EACH_INPUT_BUFFER
- // Buffer containing the vector data, when reading from an in-memory buffer.
- // Size: nElements * nDimensions * element_size
- // Clean up with sqlite3_free() once complete
- void *vector;
- // when input_type == VEC_NPY_EACH_INPUT_FILE
- // Opened npy file, when reading from a file.
- // fclose() when complete.
- #ifndef SQLITE_VEC_OMIT_FS
- FILE *file;
- #endif
- // an in-memory buffer containing a portion of the npy array.
- // Used for faster reading, instead of calling fread a lot.
- // Will have a byte-size of fileBufferSize
- void *chunksBuffer;
- // size of allocated fileBuffer in bytes
- size_t chunksBufferSize;
- //// Maximum length of the buffer, in terms of number of vectors.
- size_t maxChunks;
- // Counter index of the current vector into of fileBuffer to yield.
- // Starts at 0 once fileBuffer is read, and iterates to bufferLength.
- // Resets to 0 once that "buffer" is yielded and a new one is read.
- size_t currentChunkIndex;
- size_t currentChunkSize;
- // 0 when there are still more elements to read/yield, 1 when complete.
- int eof;
- };
- static unsigned char NPY_MAGIC[6] = "\x93NUMPY";
- #ifndef SQLITE_VEC_OMIT_FS
- int parse_npy_file(sqlite3_vtab *pVTab, FILE *file, vec_npy_each_cursor *pCur) {
- int n;
- fseek(file, 0, SEEK_END);
- long fileSize = ftell(file);
- fseek(file, 0L, SEEK_SET);
- unsigned char header[10];
- n = fread(&header, sizeof(unsigned char), 10, file);
- if (n != 10) {
- vtab_set_error(pVTab, "numpy array file too short");
- return SQLITE_ERROR;
- }
- if (memcmp(NPY_MAGIC, header, sizeof(NPY_MAGIC)) != 0) {
- vtab_set_error(pVTab,
- "numpy array file does not contain the 'magic' header");
- return SQLITE_ERROR;
- }
- u8 major = header[6];
- u8 minor = header[7];
- uint16_t headerLength = 0;
- memcpy(&headerLength, &header[8], sizeof(uint16_t));
- size_t totalHeaderLength = sizeof(NPY_MAGIC) + sizeof(major) + sizeof(minor) +
- sizeof(headerLength) + headerLength;
- i32 dataSize = fileSize - totalHeaderLength;
- if (dataSize < 0) {
- vtab_set_error(pVTab, "numpy array file header length is invalid");
- return SQLITE_ERROR;
- }
- unsigned char *headerX = sqlite3_malloc(headerLength);
- if (headerLength && !headerX) {
- return SQLITE_NOMEM;
- }
- n = fread(headerX, sizeof(char), headerLength, file);
- if (n != headerLength) {
- sqlite3_free(headerX);
- vtab_set_error(pVTab, "numpy array file header length is invalid");
- return SQLITE_ERROR;
- }
- int fortran_order;
- enum VectorElementType element_type;
- size_t numElements;
- size_t numDimensions;
- int rc = parse_npy_header(pVTab, headerX, headerLength, &element_type,
- &fortran_order, &numElements, &numDimensions);
- sqlite3_free(headerX);
- if (rc != SQLITE_OK) {
- // parse_npy_header already attackes an error emssage
- return rc;
- }
- i32 expectedDataSize =
- numElements * vector_byte_size(element_type, numDimensions);
- if (expectedDataSize != dataSize) {
- vtab_set_error(
- pVTab, "numpy array file error: Expected a data size of %d, found %d",
- expectedDataSize, dataSize);
- return SQLITE_ERROR;
- }
- pCur->maxChunks = 1024;
- pCur->chunksBufferSize =
- (vector_byte_size(element_type, numDimensions)) * pCur->maxChunks;
- pCur->chunksBuffer = sqlite3_malloc(pCur->chunksBufferSize);
- if (pCur->chunksBufferSize && !pCur->chunksBuffer) {
- return SQLITE_NOMEM;
- }
- pCur->currentChunkSize =
- fread(pCur->chunksBuffer, vector_byte_size(element_type, numDimensions),
- pCur->maxChunks, file);
- pCur->currentChunkIndex = 0;
- pCur->elementType = element_type;
- pCur->nElements = numElements;
- pCur->nDimensions = numDimensions;
- pCur->input_type = VEC_NPY_EACH_INPUT_FILE;
- pCur->eof = pCur->currentChunkSize == 0;
- pCur->file = file;
- return SQLITE_OK;
- }
- #endif
- int parse_npy_buffer(sqlite3_vtab *pVTab, const unsigned char *buffer,
- int bufferLength, void **data, size_t *numElements,
- size_t *numDimensions,
- enum VectorElementType *element_type) {
- if (bufferLength < 10) {
- // IMP: V03312_20150
- vtab_set_error(pVTab, "numpy array too short");
- return SQLITE_ERROR;
- }
- if (memcmp(NPY_MAGIC, buffer, sizeof(NPY_MAGIC)) != 0) {
- // V11954_28792
- vtab_set_error(pVTab, "numpy array does not contain the 'magic' header");
- return SQLITE_ERROR;
- }
- u8 major = buffer[6];
- u8 minor = buffer[7];
- uint16_t headerLength = 0;
- memcpy(&headerLength, &buffer[8], sizeof(uint16_t));
- i32 totalHeaderLength = sizeof(NPY_MAGIC) + sizeof(major) + sizeof(minor) +
- sizeof(headerLength) + headerLength;
- i32 dataSize = bufferLength - totalHeaderLength;
- if (dataSize < 0) {
- vtab_set_error(pVTab, "numpy array header length is invalid");
- return SQLITE_ERROR;
- }
- const unsigned char *header = &buffer[10];
- int fortran_order;
- int rc = parse_npy_header(pVTab, header, headerLength, element_type,
- &fortran_order, numElements, numDimensions);
- if (rc != SQLITE_OK) {
- return rc;
- }
- i32 expectedDataSize =
- (*numElements * vector_byte_size(*element_type, *numDimensions));
- if (expectedDataSize != dataSize) {
- vtab_set_error(pVTab,
- "numpy array error: Expected a data size of %d, found %d",
- expectedDataSize, dataSize);
- return SQLITE_ERROR;
- }
- *data = (void *)&buffer[totalHeaderLength];
- return SQLITE_OK;
- }
- static int vec_npy_eachConnect(sqlite3 *db, void *pAux, int argc,
- const char *const *argv, sqlite3_vtab **ppVtab,
- char **pzErr) {
- UNUSED_PARAMETER(pAux);
- UNUSED_PARAMETER(argc);
- UNUSED_PARAMETER(argv);
- UNUSED_PARAMETER(pzErr);
- vec_npy_each_vtab *pNew;
- int rc;
- rc = sqlite3_declare_vtab(db, "CREATE TABLE x(vector, input hidden)");
- #define VEC_NPY_EACH_COLUMN_VECTOR 0
- #define VEC_NPY_EACH_COLUMN_INPUT 1
- if (rc == SQLITE_OK) {
- pNew = sqlite3_malloc(sizeof(*pNew));
- *ppVtab = (sqlite3_vtab *)pNew;
- if (pNew == 0)
- return SQLITE_NOMEM;
- memset(pNew, 0, sizeof(*pNew));
- }
- return rc;
- }
- static int vec_npy_eachDisconnect(sqlite3_vtab *pVtab) {
- vec_npy_each_vtab *p = (vec_npy_each_vtab *)pVtab;
- sqlite3_free(p);
- return SQLITE_OK;
- }
- static int vec_npy_eachOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) {
- UNUSED_PARAMETER(p);
- vec_npy_each_cursor *pCur;
- pCur = sqlite3_malloc(sizeof(*pCur));
- if (pCur == 0)
- return SQLITE_NOMEM;
- memset(pCur, 0, sizeof(*pCur));
- *ppCursor = &pCur->base;
- return SQLITE_OK;
- }
- static int vec_npy_eachClose(sqlite3_vtab_cursor *cur) {
- vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur;
- #ifndef SQLITE_VEC_OMIT_FS
- if (pCur->file) {
- fclose(pCur->file);
- pCur->file = NULL;
- }
- #endif
- if (pCur->chunksBuffer) {
- sqlite3_free(pCur->chunksBuffer);
- pCur->chunksBuffer = NULL;
- }
- if (pCur->vector) {
- pCur->vector = NULL;
- }
- sqlite3_free(pCur);
- return SQLITE_OK;
- }
- static int vec_npy_eachBestIndex(sqlite3_vtab *pVTab,
- sqlite3_index_info *pIdxInfo) {
- int hasInput;
- for (int i = 0; i < pIdxInfo->nConstraint; i++) {
- const struct sqlite3_index_constraint *pCons = &pIdxInfo->aConstraint[i];
- // printf("i=%d iColumn=%d, op=%d, usable=%d\n", i, pCons->iColumn,
- // pCons->op, pCons->usable);
- switch (pCons->iColumn) {
- case VEC_NPY_EACH_COLUMN_INPUT: {
- if (pCons->op == SQLITE_INDEX_CONSTRAINT_EQ && pCons->usable) {
- hasInput = 1;
- pIdxInfo->aConstraintUsage[i].argvIndex = 1;
- pIdxInfo->aConstraintUsage[i].omit = 1;
- }
- break;
- }
- }
- }
- if (!hasInput) {
- pVTab->zErrMsg = sqlite3_mprintf("input argument is required");
- return SQLITE_ERROR;
- }
- pIdxInfo->estimatedCost = (double)100000;
- pIdxInfo->estimatedRows = 100000;
- return SQLITE_OK;
- }
- static int vec_npy_eachFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum,
- const char *idxStr, int argc,
- sqlite3_value **argv) {
- UNUSED_PARAMETER(idxNum);
- UNUSED_PARAMETER(idxStr);
- assert(argc == 1);
- int rc;
- vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)pVtabCursor;
- #ifndef SQLITE_VEC_OMIT_FS
- if (pCur->file) {
- fclose(pCur->file);
- pCur->file = NULL;
- }
- #endif
- if (pCur->chunksBuffer) {
- sqlite3_free(pCur->chunksBuffer);
- pCur->chunksBuffer = NULL;
- }
- if (pCur->vector) {
- pCur->vector = NULL;
- }
- #ifndef SQLITE_VEC_OMIT_FS
- struct VecNpyFile *f = NULL;
- if ((f = sqlite3_value_pointer(argv[0], SQLITE_VEC_NPY_FILE_NAME))) {
- FILE *file = fopen(f->path, "r");
- if (!file) {
- vtab_set_error(pVtabCursor->pVtab, "Could not open numpy file");
- return SQLITE_ERROR;
- }
- rc = parse_npy_file(pVtabCursor->pVtab, file, pCur);
- if (rc != SQLITE_OK) {
- #ifndef SQLITE_VEC_OMIT_FS
- fclose(file);
- #endif
- return rc;
- }
- } else
- #endif
- {
- const unsigned char *input = sqlite3_value_blob(argv[0]);
- int inputLength = sqlite3_value_bytes(argv[0]);
- void *data;
- size_t numElements;
- size_t numDimensions;
- enum VectorElementType element_type;
- rc = parse_npy_buffer(pVtabCursor->pVtab, input, inputLength, &data,
- &numElements, &numDimensions, &element_type);
- if (rc != SQLITE_OK) {
- return rc;
- }
- pCur->vector = data;
- pCur->elementType = element_type;
- pCur->nElements = numElements;
- pCur->nDimensions = numDimensions;
- pCur->input_type = VEC_NPY_EACH_INPUT_BUFFER;
- }
- pCur->iRowid = 0;
- return SQLITE_OK;
- }
- static int vec_npy_eachRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) {
- vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur;
- *pRowid = pCur->iRowid;
- return SQLITE_OK;
- }
- static int vec_npy_eachEof(sqlite3_vtab_cursor *cur) {
- vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur;
- if (pCur->input_type == VEC_NPY_EACH_INPUT_BUFFER) {
- return (!pCur->nElements) || (size_t)pCur->iRowid >= pCur->nElements;
- }
- return pCur->eof;
- }
- static int vec_npy_eachNext(sqlite3_vtab_cursor *cur) {
- vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur;
- pCur->iRowid++;
- if (pCur->input_type == VEC_NPY_EACH_INPUT_BUFFER) {
- return SQLITE_OK;
- }
- #ifndef SQLITE_VEC_OMIT_FS
- // else: input is a file
- pCur->currentChunkIndex++;
- if (pCur->currentChunkIndex >= pCur->currentChunkSize) {
- pCur->currentChunkSize =
- fread(pCur->chunksBuffer,
- vector_byte_size(pCur->elementType, pCur->nDimensions),
- pCur->maxChunks, pCur->file);
- if (!pCur->currentChunkSize) {
- pCur->eof = 1;
- }
- pCur->currentChunkIndex = 0;
- }
- #endif
- return SQLITE_OK;
- }
- static int vec_npy_eachColumnBuffer(vec_npy_each_cursor *pCur,
- sqlite3_context *context, int i) {
- switch (i) {
- case VEC_NPY_EACH_COLUMN_VECTOR: {
- sqlite3_result_subtype(context, pCur->elementType);
- switch (pCur->elementType) {
- case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
- sqlite3_result_blob(
- context,
- &((unsigned char *)
- pCur->vector)[pCur->iRowid * pCur->nDimensions * sizeof(f32)],
- pCur->nDimensions * sizeof(f32), SQLITE_TRANSIENT);
- break;
- }
- case SQLITE_VEC_ELEMENT_TYPE_INT8:
- case SQLITE_VEC_ELEMENT_TYPE_BIT: {
- // https://github.com/asg017/sqlite-vec/issues/42
- sqlite3_result_error(context,
- "vec_npy_each only supports float32 vectors", -1);
- break;
- }
- }
- break;
- }
- }
- return SQLITE_OK;
- }
- static int vec_npy_eachColumnFile(vec_npy_each_cursor *pCur,
- sqlite3_context *context, int i) {
- switch (i) {
- case VEC_NPY_EACH_COLUMN_VECTOR: {
- switch (pCur->elementType) {
- case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
- sqlite3_result_blob(
- context,
- &((unsigned char *)
- pCur->chunksBuffer)[pCur->currentChunkIndex *
- pCur->nDimensions * sizeof(f32)],
- pCur->nDimensions * sizeof(f32), SQLITE_TRANSIENT);
- break;
- }
- case SQLITE_VEC_ELEMENT_TYPE_INT8:
- case SQLITE_VEC_ELEMENT_TYPE_BIT: {
- // https://github.com/asg017/sqlite-vec/issues/42
- sqlite3_result_error(context,
- "vec_npy_each only supports float32 vectors", -1);
- break;
- }
- }
- break;
- }
- }
- return SQLITE_OK;
- }
- static int vec_npy_eachColumn(sqlite3_vtab_cursor *cur,
- sqlite3_context *context, int i) {
- vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur;
- switch (pCur->input_type) {
- case VEC_NPY_EACH_INPUT_BUFFER:
- return vec_npy_eachColumnBuffer(pCur, context, i);
- case VEC_NPY_EACH_INPUT_FILE:
- return vec_npy_eachColumnFile(pCur, context, i);
- }
- return SQLITE_ERROR;
- }
- static sqlite3_module vec_npy_eachModule = {
- /* iVersion */ 0,
- /* xCreate */ 0,
- /* xConnect */ vec_npy_eachConnect,
- /* xBestIndex */ vec_npy_eachBestIndex,
- /* xDisconnect */ vec_npy_eachDisconnect,
- /* xDestroy */ 0,
- /* xOpen */ vec_npy_eachOpen,
- /* xClose */ vec_npy_eachClose,
- /* xFilter */ vec_npy_eachFilter,
- /* xNext */ vec_npy_eachNext,
- /* xEof */ vec_npy_eachEof,
- /* xColumn */ vec_npy_eachColumn,
- /* xRowid */ vec_npy_eachRowid,
- /* xUpdate */ 0,
- /* xBegin */ 0,
- /* xSync */ 0,
- /* xCommit */ 0,
- /* xRollback */ 0,
- /* xFindMethod */ 0,
- /* xRename */ 0,
- /* xSavepoint */ 0,
- /* xRelease */ 0,
- /* xRollbackTo */ 0,
- /* xShadowName */ 0,
- #if SQLITE_VERSION_NUMBER >= 3044000
- /* xIntegrity */ 0,
- #endif
- };
- #pragma endregion
- #pragma region vec0 virtual table
- #define VEC0_COLUMN_ID 0
- #define VEC0_COLUMN_USERN_START 1
- #define VEC0_COLUMN_OFFSET_DISTANCE 1
- #define VEC0_COLUMN_OFFSET_K 2
- #define VEC0_SHADOW_INFO_NAME "\"%w\".\"%w_info\""
- #define VEC0_SHADOW_CHUNKS_NAME "\"%w\".\"%w_chunks\""
- /// 1) schema, 2) original vtab table name
- #define VEC0_SHADOW_CHUNKS_CREATE \
- "CREATE TABLE " VEC0_SHADOW_CHUNKS_NAME "(" \
- "chunk_id INTEGER PRIMARY KEY AUTOINCREMENT," \
- "size INTEGER NOT NULL," \
- "validity BLOB NOT NULL," \
- "rowids BLOB NOT NULL" \
- ");"
- #define VEC0_SHADOW_ROWIDS_NAME "\"%w\".\"%w_rowids\""
- /// 1) schema, 2) original vtab table name
- #define VEC0_SHADOW_ROWIDS_CREATE_BASIC \
- "CREATE TABLE " VEC0_SHADOW_ROWIDS_NAME "(" \
- "rowid INTEGER PRIMARY KEY AUTOINCREMENT," \
- "id," \
- "chunk_id INTEGER," \
- "chunk_offset INTEGER" \
- ");"
- // vec0 tables with a text primary keys are still backed by int64 primary keys,
- // since a fixed-length rowid is required for vec0 chunks. But we add a new 'id
- // text unique' column to emulate a text primary key interface.
- #define VEC0_SHADOW_ROWIDS_CREATE_PK_TEXT \
- "CREATE TABLE " VEC0_SHADOW_ROWIDS_NAME "(" \
- "rowid INTEGER PRIMARY KEY AUTOINCREMENT," \
- "id TEXT UNIQUE NOT NULL," \
- "chunk_id INTEGER," \
- "chunk_offset INTEGER" \
- ");"
- /// 1) schema, 2) original vtab table name
- #define VEC0_SHADOW_VECTOR_N_NAME "\"%w\".\"%w_vector_chunks%02d\""
- /// 1) schema, 2) original vtab table name
- #define VEC0_SHADOW_VECTOR_N_CREATE \
- "CREATE TABLE " VEC0_SHADOW_VECTOR_N_NAME "(" \
- "rowid PRIMARY KEY," \
- "vectors BLOB NOT NULL" \
- ");"
- #define VEC0_SHADOW_AUXILIARY_NAME "\"%w\".\"%w_auxiliary\""
- #define VEC0_SHADOW_METADATA_N_NAME "\"%w\".\"%w_metadatachunks%02d\""
- #define VEC0_SHADOW_METADATA_TEXT_DATA_NAME "\"%w\".\"%w_metadatatext%02d\""
- #define VEC_INTERAL_ERROR "Internal sqlite-vec error: "
- #define REPORT_URL "https://github.com/asg017/sqlite-vec/issues/new"
- typedef struct vec0_vtab vec0_vtab;
- #define VEC0_MAX_VECTOR_COLUMNS 16
- #define VEC0_MAX_PARTITION_COLUMNS 4
- #define VEC0_MAX_AUXILIARY_COLUMNS 16
- #define VEC0_MAX_METADATA_COLUMNS 16
- #define SQLITE_VEC_VEC0_MAX_DIMENSIONS 8192
- #define VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH 16
- #define VEC0_METADATA_TEXT_VIEW_DATA_LENGTH 12
- typedef enum {
- // vector column, ie "contents_embedding float[1024]"
- SQLITE_VEC0_USER_COLUMN_KIND_VECTOR = 1,
- // partition key column, ie "user_id integer partition key"
- SQLITE_VEC0_USER_COLUMN_KIND_PARTITION = 2,
- //
- SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY = 3,
- // metadata column that can be filtered, ie "genre text"
- SQLITE_VEC0_USER_COLUMN_KIND_METADATA = 4,
- } vec0_user_column_kind;
- struct vec0_vtab {
- sqlite3_vtab base;
- // the SQLite connection of the host database
- sqlite3 *db;
- // True if the primary key of the vec0 table has a column type TEXT.
- // Will change the schema of the _rowids table, and insert/query logic.
- int pkIsText;
- // number of defined vector columns.
- int numVectorColumns;
- // number of defined PARTITION KEY columns.
- int numPartitionColumns;
- // number of defined auxiliary columns
- int numAuxiliaryColumns;
- // number of defined metadata columns
- int numMetadataColumns;
- // Name of the schema the table exists on.
- // Must be freed with sqlite3_free()
- char *schemaName;
- // Name of the table the table exists on.
- // Must be freed with sqlite3_free()
- char *tableName;
- // Name of the _rowids shadow table.
- // Must be freed with sqlite3_free()
- char *shadowRowidsName;
- // Name of the _chunks shadow table.
- // Must be freed with sqlite3_free()
- char *shadowChunksName;
- // contains enum vec0_user_column_kind values for up to
- // numVectorColumns + numPartitionColumns entries
- vec0_user_column_kind user_column_kinds[VEC0_MAX_VECTOR_COLUMNS + VEC0_MAX_PARTITION_COLUMNS + VEC0_MAX_AUXILIARY_COLUMNS + VEC0_MAX_METADATA_COLUMNS];
- uint8_t user_column_idxs[VEC0_MAX_VECTOR_COLUMNS + VEC0_MAX_PARTITION_COLUMNS + VEC0_MAX_AUXILIARY_COLUMNS + VEC0_MAX_METADATA_COLUMNS];
- // Name of all the vector chunk shadow tables.
- // Ex '_vector_chunks00'
- // Only the first numVectorColumns entries will be available.
- // The first numVectorColumns entries must be freed with sqlite3_free()
- char *shadowVectorChunksNames[VEC0_MAX_VECTOR_COLUMNS];
- // Name of all metadata chunk shadow tables, ie `_metadatachunks00`
- // Only the first numMetadataColumns entries will be available.
- // The first numMetadataColumns entries must be freed with sqlite3_free()
- char *shadowMetadataChunksNames[VEC0_MAX_METADATA_COLUMNS];
- struct VectorColumnDefinition vector_columns[VEC0_MAX_VECTOR_COLUMNS];
- struct Vec0PartitionColumnDefinition paritition_columns[VEC0_MAX_PARTITION_COLUMNS];
- struct Vec0AuxiliaryColumnDefinition auxiliary_columns[VEC0_MAX_AUXILIARY_COLUMNS];
- struct Vec0MetadataColumnDefinition metadata_columns[VEC0_MAX_METADATA_COLUMNS];
- int chunk_size;
- // select latest chunk from _chunks, getting chunk_id
- sqlite3_stmt *stmtLatestChunk;
- /**
- * Statement to insert a row into the _rowids table, with a rowid.
- * Parameters:
- * 1: int64, rowid to insert
- * Result columns: none
- * SQL: "INSERT INTO _rowids(rowid) VALUES (?)"
- *
- * Must be cleaned up with sqlite3_finalize().
- */
- sqlite3_stmt *stmtRowidsInsertRowid;
- /**
- * Statement to insert a row into the _rowids table, with an id.
- * The id column isn't a tradition primary key, but instead a unique
- * column to handle "text primary key" vec0 tables. The true int64 rowid
- * can be retrieved after inserting with sqlite3_last_rowid().
- *
- * Parameters:
- * 1: text or null, id to insert
- * Result columns: none
- *
- * Must be cleaned up with sqlite3_finalize().
- */
- sqlite3_stmt *stmtRowidsInsertId;
- /**
- * Statement to update the "position" columns chunk_id and chunk_offset for
- * a given _rowids row. Used when the "next available" chunk position is found
- * for a vector.
- *
- * Parameters:
- * 1: int64, chunk_id value
- * 2: int64, chunk_offset value
- * 3: int64, rowid value
- * Result columns: none
- *
- * Must be cleaned up with sqlite3_finalize().
- */
- sqlite3_stmt *stmtRowidsUpdatePosition;
- /**
- * Statement to quickly find the chunk_id + chunk_offset of a given row.
- * Parameters:
- * 1: rowid of the row/vector to lookup
- * Result columns:
- * 0: chunk_id (i64)
- * 1: chunk_offset (i64)
- * SQL: "SELECT id, chunk_id, chunk_offset FROM _rowids WHERE rowid = ?""
- *
- * Must be cleaned up with sqlite3_finalize().
- */
- sqlite3_stmt *stmtRowidsGetChunkPosition;
- };
- /**
- * @brief Finalize all the sqlite3_stmt members in a vec0_vtab.
- *
- * @param p vec0_vtab pointer
- */
- void vec0_free_resources(vec0_vtab *p) {
- sqlite3_finalize(p->stmtLatestChunk);
- p->stmtLatestChunk = NULL;
- sqlite3_finalize(p->stmtRowidsInsertRowid);
- p->stmtRowidsInsertRowid = NULL;
- sqlite3_finalize(p->stmtRowidsInsertId);
- p->stmtRowidsInsertId = NULL;
- sqlite3_finalize(p->stmtRowidsUpdatePosition);
- p->stmtRowidsUpdatePosition = NULL;
- sqlite3_finalize(p->stmtRowidsGetChunkPosition);
- p->stmtRowidsGetChunkPosition = NULL;
- }
- /**
- * @brief Free all memory and sqlite3_stmt members of a vec0_vtab
- *
- * @param p vec0_vtab pointer
- */
- void vec0_free(vec0_vtab *p) {
- vec0_free_resources(p);
- sqlite3_free(p->schemaName);
- p->schemaName = NULL;
- sqlite3_free(p->tableName);
- p->tableName = NULL;
- sqlite3_free(p->shadowChunksName);
- p->shadowChunksName = NULL;
- sqlite3_free(p->shadowRowidsName);
- p->shadowRowidsName = NULL;
- for (int i = 0; i < p->numVectorColumns; i++) {
- sqlite3_free(p->shadowVectorChunksNames[i]);
- p->shadowVectorChunksNames[i] = NULL;
- sqlite3_free(p->vector_columns[i].name);
- p->vector_columns[i].name = NULL;
- }
- }
- int vec0_num_defined_user_columns(vec0_vtab *p) {
- return p->numVectorColumns + p->numPartitionColumns + p->numAuxiliaryColumns + p->numMetadataColumns;
- }
- /**
- * @brief Returns the index of the distance hidden column for the given vec0
- * table.
- *
- * @param p vec0 table
- * @return int
- */
- int vec0_column_distance_idx(vec0_vtab *p) {
- return VEC0_COLUMN_USERN_START + (vec0_num_defined_user_columns(p) - 1) +
- VEC0_COLUMN_OFFSET_DISTANCE;
- }
- /**
- * @brief Returns the index of the k hidden column for the given vec0 table.
- *
- * @param p vec0 table
- * @return int k column index
- */
- int vec0_column_k_idx(vec0_vtab *p) {
- return VEC0_COLUMN_USERN_START + (vec0_num_defined_user_columns(p) - 1) +
- VEC0_COLUMN_OFFSET_K;
- }
- /**
- * Returns 1 if the given column-based index is a valid vector column,
- * 0 otherwise.
- */
- int vec0_column_idx_is_vector(vec0_vtab *pVtab, int column_idx) {
- return column_idx >= VEC0_COLUMN_USERN_START &&
- column_idx <= (VEC0_COLUMN_USERN_START + vec0_num_defined_user_columns(pVtab) - 1) &&
- pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START] == SQLITE_VEC0_USER_COLUMN_KIND_VECTOR;
- }
- /**
- * Returns the vector index of the given user column index.
- * ONLY call if validated with vec0_column_idx_is_vector before
- */
- int vec0_column_idx_to_vector_idx(vec0_vtab *pVtab, int column_idx) {
- UNUSED_PARAMETER(pVtab);
- return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START];
- }
- /**
- * Returns 1 if the given column-based index is a "partition key" column,
- * 0 otherwise.
- */
- int vec0_column_idx_is_partition(vec0_vtab *pVtab, int column_idx) {
- return column_idx >= VEC0_COLUMN_USERN_START &&
- column_idx <= (VEC0_COLUMN_USERN_START + vec0_num_defined_user_columns(pVtab) - 1) &&
- pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START] == SQLITE_VEC0_USER_COLUMN_KIND_PARTITION;
- }
- /**
- * Returns the partition column index of the given user column index.
- * ONLY call if validated with vec0_column_idx_is_vector before
- */
- int vec0_column_idx_to_partition_idx(vec0_vtab *pVtab, int column_idx) {
- UNUSED_PARAMETER(pVtab);
- return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START];
- }
- /**
- * Returns 1 if the given column-based index is a auxiliary column,
- * 0 otherwise.
- */
- int vec0_column_idx_is_auxiliary(vec0_vtab *pVtab, int column_idx) {
- return column_idx >= VEC0_COLUMN_USERN_START &&
- column_idx <= (VEC0_COLUMN_USERN_START + vec0_num_defined_user_columns(pVtab) - 1) &&
- pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START] == SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY;
- }
- /**
- * Returns the auxiliary column index of the given user column index.
- * ONLY call if validated with vec0_column_idx_to_partition_idx before
- */
- int vec0_column_idx_to_auxiliary_idx(vec0_vtab *pVtab, int column_idx) {
- UNUSED_PARAMETER(pVtab);
- return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START];
- }
- /**
- * Returns 1 if the given column-based index is a metadata column,
- * 0 otherwise.
- */
- int vec0_column_idx_is_metadata(vec0_vtab *pVtab, int column_idx) {
- return column_idx >= VEC0_COLUMN_USERN_START &&
- column_idx <= (VEC0_COLUMN_USERN_START + vec0_num_defined_user_columns(pVtab) - 1) &&
- pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START] == SQLITE_VEC0_USER_COLUMN_KIND_METADATA;
- }
- /**
- * Returns the metadata column index of the given user column index.
- * ONLY call if validated with vec0_column_idx_is_metadata before
- */
- int vec0_column_idx_to_metadata_idx(vec0_vtab *pVtab, int column_idx) {
- UNUSED_PARAMETER(pVtab);
- return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START];
- }
- /**
- * @brief Retrieve the chunk_id, chunk_offset, and possible "id" value
- * of a vec0_vtab row with the provided rowid
- *
- * @param p vec0_vtab
- * @param rowid the rowid of the row to query
- * @param id output, optional sqlite3_value to provide the id.
- * Useful for text PK rows. Must be freed with sqlite3_value_free()
- * @param chunk_id output, the chunk_id the row belongs to
- * @param chunk_offset output, the offset within the chunk the row belongs to
- * @return SQLITE_ROW on success, error code otherwise. SQLITE_EMPTY if row DNE
- */
- int vec0_get_chunk_position(vec0_vtab *p, i64 rowid, sqlite3_value **id,
- i64 *chunk_id, i64 *chunk_offset) {
- int rc;
- if (!p->stmtRowidsGetChunkPosition) {
- const char *zSql =
- sqlite3_mprintf("SELECT id, chunk_id, chunk_offset "
- "FROM " VEC0_SHADOW_ROWIDS_NAME " WHERE rowid = ?",
- p->schemaName, p->tableName);
- if (!zSql) {
- rc = SQLITE_NOMEM;
- goto cleanup;
- }
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &p->stmtRowidsGetChunkPosition, 0);
- sqlite3_free((void *)zSql);
- if (rc != SQLITE_OK) {
- vtab_set_error(
- &p->base, VEC_INTERAL_ERROR
- "could not initialize 'rowids get chunk position' statement");
- goto cleanup;
- }
- }
- sqlite3_bind_int64(p->stmtRowidsGetChunkPosition, 1, rowid);
- rc = sqlite3_step(p->stmtRowidsGetChunkPosition);
- // special case: when no results, return SQLITE_EMPTY to convey "that chunk
- // position doesnt exist"
- if (rc == SQLITE_DONE) {
- rc = SQLITE_EMPTY;
- goto cleanup;
- }
- if (rc != SQLITE_ROW) {
- goto cleanup;
- }
- if (id) {
- sqlite3_value *value =
- sqlite3_column_value(p->stmtRowidsGetChunkPosition, 0);
- *id = sqlite3_value_dup(value);
- if (!*id) {
- rc = SQLITE_NOMEM;
- goto cleanup;
- }
- }
- if (chunk_id) {
- *chunk_id = sqlite3_column_int64(p->stmtRowidsGetChunkPosition, 1);
- }
- if (chunk_offset) {
- *chunk_offset = sqlite3_column_int64(p->stmtRowidsGetChunkPosition, 2);
- }
- rc = SQLITE_OK;
- cleanup:
- sqlite3_reset(p->stmtRowidsGetChunkPosition);
- sqlite3_clear_bindings(p->stmtRowidsGetChunkPosition);
- return rc;
- }
- /**
- * @brief Return the id value from the _rowids table where _rowids.rowid =
- * rowid.
- *
- * @param pVtab: vec0 table to query
- * @param rowid: rowid of the row to query.
- * @param out: A dup'ed sqlite3_value of the id column. Might be null.
- * Must be cleaned up with sqlite3_value_free().
- * @returns SQLITE_OK on success, error code on failure
- */
- int vec0_get_id_value_from_rowid(vec0_vtab *pVtab, i64 rowid,
- sqlite3_value **out) {
- // PERF: different strategy than get_chunk_position?
- return vec0_get_chunk_position((vec0_vtab *)pVtab, rowid, out, NULL, NULL);
- }
- int vec0_rowid_from_id(vec0_vtab *p, sqlite3_value *valueId, i64 *rowid) {
- sqlite3_stmt *stmt = NULL;
- int rc;
- char *zSql;
- zSql = sqlite3_mprintf("SELECT rowid"
- " FROM " VEC0_SHADOW_ROWIDS_NAME " WHERE id = ?",
- p->schemaName, p->tableName);
- if (!zSql) {
- rc = SQLITE_NOMEM;
- goto cleanup;
- }
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
- sqlite3_free(zSql);
- if (rc != SQLITE_OK) {
- goto cleanup;
- }
- sqlite3_bind_value(stmt, 1, valueId);
- rc = sqlite3_step(stmt);
- if (rc == SQLITE_DONE) {
- rc = SQLITE_EMPTY;
- goto cleanup;
- }
- if (rc != SQLITE_ROW) {
- goto cleanup;
- }
- *rowid = sqlite3_column_int64(stmt, 0);
- rc = sqlite3_step(stmt);
- if (rc != SQLITE_DONE) {
- goto cleanup;
- }
- rc = SQLITE_OK;
- cleanup:
- sqlite3_finalize(stmt);
- return rc;
- }
- int vec0_result_id(vec0_vtab *p, sqlite3_context *context, i64 rowid) {
- if (!p->pkIsText) {
- sqlite3_result_int64(context, rowid);
- return SQLITE_OK;
- }
- sqlite3_value *valueId;
- int rc = vec0_get_id_value_from_rowid(p, rowid, &valueId);
- if (rc != SQLITE_OK) {
- return rc;
- }
- if (!valueId) {
- sqlite3_result_error_nomem(context);
- } else {
- sqlite3_result_value(context, valueId);
- sqlite3_value_free(valueId);
- }
- return SQLITE_OK;
- }
- /**
- * @brief
- *
- * @param pVtab: virtual table to query
- * @param rowid: row to lookup
- * @param vector_column_idx: which vector column to query
- * @param outVector: Output pointer to the vector buffer.
- * Must be sqlite3_free()'ed.
- * @param outVectorSize: Pointer to a int where the size of outVector
- * will be stored.
- * @return int SQLITE_OK on success.
- */
- int vec0_get_vector_data(vec0_vtab *pVtab, i64 rowid, int vector_column_idx,
- void **outVector, int *outVectorSize) {
- vec0_vtab *p = pVtab;
- int rc, brc;
- i64 chunk_id;
- i64 chunk_offset;
- size_t size;
- void *buf = NULL;
- int blobOffset;
- sqlite3_blob *vectorBlob = NULL;
- assert((vector_column_idx >= 0) &&
- (vector_column_idx < pVtab->numVectorColumns));
- rc = vec0_get_chunk_position(pVtab, rowid, NULL, &chunk_id, &chunk_offset);
- if (rc == SQLITE_EMPTY) {
- vtab_set_error(&pVtab->base, "Could not find a row with rowid %lld", rowid);
- goto cleanup;
- }
- if (rc != SQLITE_OK) {
- goto cleanup;
- }
- rc = sqlite3_blob_open(p->db, p->schemaName,
- p->shadowVectorChunksNames[vector_column_idx],
- "vectors", chunk_id, 0, &vectorBlob);
- if (rc != SQLITE_OK) {
- vtab_set_error(&pVtab->base,
- "Could not fetch vector data for %lld, opening blob failed",
- rowid);
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- size = vector_column_byte_size(pVtab->vector_columns[vector_column_idx]);
- blobOffset = chunk_offset * size;
- buf = sqlite3_malloc(size);
- if (!buf) {
- rc = SQLITE_NOMEM;
- goto cleanup;
- }
- rc = sqlite3_blob_read(vectorBlob, buf, size, blobOffset);
- if (rc != SQLITE_OK) {
- sqlite3_free(buf);
- buf = NULL;
- vtab_set_error(
- &pVtab->base,
- "Could not fetch vector data for %lld, reading from blob failed",
- rowid);
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- *outVector = buf;
- if (outVectorSize) {
- *outVectorSize = size;
- }
- rc = SQLITE_OK;
- cleanup:
- brc = sqlite3_blob_close(vectorBlob);
- if ((rc == SQLITE_OK) && (brc != SQLITE_OK)) {
- vtab_set_error(
- &p->base, VEC_INTERAL_ERROR
- "unknown error, could not close vector blob, please file an issue");
- return brc;
- }
- return rc;
- }
- /**
- * @brief Retrieve the sqlite3_value of the i'th partition value for the given row.
- *
- * @param pVtab - the vec0_vtab in questions
- * @param rowid - rowid of target row
- * @param partition_idx - which partition column to retrieve
- * @param outValue - output sqlite3_value
- * @return int - SQLITE_OK on success, otherwise error code
- */
- int vec0_get_partition_value_for_rowid(vec0_vtab *pVtab, i64 rowid, int partition_idx, sqlite3_value ** outValue) {
- int rc;
- i64 chunk_id;
- i64 chunk_offset;
- rc = vec0_get_chunk_position(pVtab, rowid, NULL, &chunk_id, &chunk_offset);
- if(rc != SQLITE_OK) {
- return rc;
- }
- sqlite3_stmt * stmt = NULL;
- char * zSql = sqlite3_mprintf("SELECT partition%02d FROM " VEC0_SHADOW_CHUNKS_NAME " WHERE chunk_id = ?", partition_idx, pVtab->schemaName, pVtab->tableName);
- if(!zSql) {
- return SQLITE_NOMEM;
- }
- rc = sqlite3_prepare_v2(pVtab->db, zSql, -1, &stmt, NULL);
- sqlite3_free(zSql);
- if(rc != SQLITE_OK) {
- return rc;
- }
- sqlite3_bind_int64(stmt, 1, chunk_id);
- rc = sqlite3_step(stmt);
- if(rc != SQLITE_ROW) {
- rc = SQLITE_ERROR;
- goto done;
- }
- *outValue = sqlite3_value_dup(sqlite3_column_value(stmt, 0));
- if(!*outValue) {
- rc = SQLITE_NOMEM;
- goto done;
- }
- rc = SQLITE_OK;
- done:
- sqlite3_finalize(stmt);
- return rc;
- }
- /**
- * @brief Get the value of an auxiliary column for the given rowid
- *
- * @param pVtab vec0_vtab
- * @param rowid the rowid of the row to lookup
- * @param auxiliary_idx aux index of the column we care about
- * @param outValue Output sqlite3_value to store
- * @return int SQLITE_OK on success, error code otherwise
- */
- int vec0_get_auxiliary_value_for_rowid(vec0_vtab *pVtab, i64 rowid, int auxiliary_idx, sqlite3_value ** outValue) {
- int rc;
- sqlite3_stmt * stmt = NULL;
- char * zSql = sqlite3_mprintf("SELECT value%02d FROM " VEC0_SHADOW_AUXILIARY_NAME " WHERE rowid = ?", auxiliary_idx, pVtab->schemaName, pVtab->tableName);
- if(!zSql) {
- return SQLITE_NOMEM;
- }
- rc = sqlite3_prepare_v2(pVtab->db, zSql, -1, &stmt, NULL);
- sqlite3_free(zSql);
- if(rc != SQLITE_OK) {
- return rc;
- }
- sqlite3_bind_int64(stmt, 1, rowid);
- rc = sqlite3_step(stmt);
- if(rc != SQLITE_ROW) {
- rc = SQLITE_ERROR;
- goto done;
- }
- *outValue = sqlite3_value_dup(sqlite3_column_value(stmt, 0));
- if(!*outValue) {
- rc = SQLITE_NOMEM;
- goto done;
- }
- rc = SQLITE_OK;
- done:
- sqlite3_finalize(stmt);
- return rc;
- }
- /**
- * @brief Result the given metadata value for the given row and metadata column index.
- * Will traverse the metadatachunksNN table with BLOB I/0 for the given rowid.
- *
- * @param p
- * @param rowid
- * @param metadata_idx
- * @param context
- * @return int
- */
- int vec0_result_metadata_value_for_rowid(vec0_vtab *p, i64 rowid, int metadata_idx, sqlite3_context * context) {
- int rc;
- i64 chunk_id;
- i64 chunk_offset;
- rc = vec0_get_chunk_position(p, rowid, NULL, &chunk_id, &chunk_offset);
- if(rc != SQLITE_OK) {
- return rc;
- }
- sqlite3_blob * blobValue;
- rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_idx], "data", chunk_id, 0, &blobValue);
- if(rc != SQLITE_OK) {
- return rc;
- }
- switch(p->metadata_columns[metadata_idx].kind) {
- case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
- u8 block;
- rc = sqlite3_blob_read(blobValue, &block, sizeof(block), chunk_offset / CHAR_BIT);
- if(rc != SQLITE_OK) {
- goto done;
- }
- int value = block >> ((chunk_offset % CHAR_BIT)) & 1;
- sqlite3_result_int(context, value);
- break;
- }
- case VEC0_METADATA_COLUMN_KIND_INTEGER: {
- i64 value;
- rc = sqlite3_blob_read(blobValue, &value, sizeof(value), chunk_offset * sizeof(i64));
- if(rc != SQLITE_OK) {
- goto done;
- }
- sqlite3_result_int64(context, value);
- break;
- }
- case VEC0_METADATA_COLUMN_KIND_FLOAT: {
- double value;
- rc = sqlite3_blob_read(blobValue, &value, sizeof(value), chunk_offset * sizeof(double));
- if(rc != SQLITE_OK) {
- goto done;
- }
- sqlite3_result_double(context, value);
- break;
- }
- case VEC0_METADATA_COLUMN_KIND_TEXT: {
- u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
- rc = sqlite3_blob_read(blobValue, &view, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH, chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
- if(rc != SQLITE_OK) {
- goto done;
- }
- int length = ((int *)view)[0];
- if(length <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
- sqlite3_result_text(context, (const char*) (view + 4), length, SQLITE_TRANSIENT);
- }
- else {
- sqlite3_stmt * stmt;
- const char * zSql = sqlite3_mprintf("SELECT data FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " WHERE rowid = ?", p->schemaName, p->tableName, metadata_idx);
- if(!zSql) {
- rc = SQLITE_ERROR;
- goto done;
- }
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
- sqlite3_free((void *) zSql);
- if(rc != SQLITE_OK) {
- goto done;
- }
- sqlite3_bind_int64(stmt, 1, rowid);
- rc = sqlite3_step(stmt);
- if(rc != SQLITE_ROW) {
- sqlite3_finalize(stmt);
- rc = SQLITE_ERROR;
- goto done;
- }
- sqlite3_result_value(context, sqlite3_column_value(stmt, 0));
- sqlite3_finalize(stmt);
- rc = SQLITE_OK;
- }
- break;
- }
- }
- done:
- // blobValue is read-only, will not fail on close
- sqlite3_blob_close(blobValue);
- return rc;
- }
- int vec0_get_latest_chunk_rowid(vec0_vtab *p, i64 *chunk_rowid, sqlite3_value ** partitionKeyValues) {
- int rc;
- const char *zSql;
- // lazy initialize stmtLatestChunk when needed. May be cleared during xSync()
- if (!p->stmtLatestChunk) {
- if(p->numPartitionColumns > 0) {
- sqlite3_str * s = sqlite3_str_new(NULL);
- sqlite3_str_appendf(s, "SELECT max(rowid) FROM " VEC0_SHADOW_CHUNKS_NAME " WHERE ",
- p->schemaName, p->tableName);
- for(int i = 0; i < p->numPartitionColumns; i++) {
- if(i != 0) {
- sqlite3_str_appendall(s, " AND ");
- }
- sqlite3_str_appendf(s, " partition%02d = ? ", i);
- }
- zSql = sqlite3_str_finish(s);
- }else {
- zSql = sqlite3_mprintf("SELECT max(rowid) FROM " VEC0_SHADOW_CHUNKS_NAME,
- p->schemaName, p->tableName);
- }
- if (!zSql) {
- rc = SQLITE_NOMEM;
- goto cleanup;
- }
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &p->stmtLatestChunk, 0);
- sqlite3_free((void *)zSql);
- if (rc != SQLITE_OK) {
- // IMP: V21406_05476
- vtab_set_error(&p->base, VEC_INTERAL_ERROR
- "could not initialize 'latest chunk' statement");
- goto cleanup;
- }
- }
- for(int i = 0; i < p->numPartitionColumns; i++) {
- sqlite3_bind_value(p->stmtLatestChunk, i+1, (partitionKeyValues[i]));
- }
- rc = sqlite3_step(p->stmtLatestChunk);
- if (rc != SQLITE_ROW) {
- // IMP: V31559_15629
- vtab_set_error(&p->base, VEC_INTERAL_ERROR "Could not find latest chunk");
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- if(sqlite3_column_type(p->stmtLatestChunk, 0) == SQLITE_NULL){
- rc = SQLITE_EMPTY;
- goto cleanup;
- }
- *chunk_rowid = sqlite3_column_int64(p->stmtLatestChunk, 0);
- rc = sqlite3_step(p->stmtLatestChunk);
- if (rc != SQLITE_DONE) {
- vtab_set_error(&p->base,
- VEC_INTERAL_ERROR
- "unknown result code when closing out stmtLatestChunk. "
- "Please file an issue: " REPORT_URL,
- p->schemaName, p->shadowChunksName);
- goto cleanup;
- }
- rc = SQLITE_OK;
- cleanup:
- if (p->stmtLatestChunk) {
- sqlite3_reset(p->stmtLatestChunk);
- sqlite3_clear_bindings(p->stmtLatestChunk);
- }
- return rc;
- }
- int vec0_rowids_insert_rowid(vec0_vtab *p, i64 rowid) {
- int rc = SQLITE_OK;
- int entered = 0;
- UNUSED_PARAMETER(entered); // temporary
- if (!p->stmtRowidsInsertRowid) {
- const char *zSql =
- sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_ROWIDS_NAME "(rowid)"
- "VALUES (?);",
- p->schemaName, p->tableName);
- if (!zSql) {
- rc = SQLITE_NOMEM;
- goto cleanup;
- }
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &p->stmtRowidsInsertRowid, 0);
- sqlite3_free((void *)zSql);
- if (rc != SQLITE_OK) {
- vtab_set_error(&p->base, VEC_INTERAL_ERROR
- "could not initialize 'insert rowids' statement");
- goto cleanup;
- }
- }
- #if SQLITE_THREADSAFE
- if (sqlite3_mutex_enter) {
- sqlite3_mutex_enter(sqlite3_db_mutex(p->db));
- entered = 1;
- }
- #endif
- sqlite3_bind_int64(p->stmtRowidsInsertRowid, 1, rowid);
- rc = sqlite3_step(p->stmtRowidsInsertRowid);
- if (rc != SQLITE_DONE) {
- if (sqlite3_extended_errcode(p->db) == SQLITE_CONSTRAINT_PRIMARYKEY) {
- // IMP: V17090_01160
- vtab_set_error(&p->base, "UNIQUE constraint failed on %s primary key",
- p->tableName);
- } else {
- // IMP: V04679_21517
- vtab_set_error(&p->base,
- "Error inserting rowid into rowids shadow table: %s",
- sqlite3_errmsg(sqlite3_db_handle(p->stmtRowidsInsertId)));
- }
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- rc = SQLITE_OK;
- cleanup:
- if (p->stmtRowidsInsertRowid) {
- sqlite3_reset(p->stmtRowidsInsertRowid);
- sqlite3_clear_bindings(p->stmtRowidsInsertRowid);
- }
- #if SQLITE_THREADSAFE
- if (sqlite3_mutex_leave && entered) {
- sqlite3_mutex_leave(sqlite3_db_mutex(p->db));
- }
- #endif
- return rc;
- }
- int vec0_rowids_insert_id(vec0_vtab *p, sqlite3_value *idValue, i64 *rowid) {
- int rc = SQLITE_OK;
- int entered = 0;
- UNUSED_PARAMETER(entered); // temporary
- if (!p->stmtRowidsInsertId) {
- const char *zSql =
- sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_ROWIDS_NAME "(id)"
- "VALUES (?);",
- p->schemaName, p->tableName);
- if (!zSql) {
- rc = SQLITE_NOMEM;
- goto complete;
- }
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &p->stmtRowidsInsertId, 0);
- sqlite3_free((void *)zSql);
- if (rc != SQLITE_OK) {
- vtab_set_error(&p->base, VEC_INTERAL_ERROR
- "could not initialize 'insert rowids id' statement");
- goto complete;
- }
- }
- #if SQLITE_THREADSAFE
- if (sqlite3_mutex_enter) {
- sqlite3_mutex_enter(sqlite3_db_mutex(p->db));
- entered = 1;
- }
- #endif
- if (idValue) {
- sqlite3_bind_value(p->stmtRowidsInsertId, 1, idValue);
- }
- rc = sqlite3_step(p->stmtRowidsInsertId);
- if (rc != SQLITE_DONE) {
- if (sqlite3_extended_errcode(p->db) == SQLITE_CONSTRAINT_UNIQUE) {
- // IMP: V20497_04568
- vtab_set_error(&p->base, "UNIQUE constraint failed on %s primary key",
- p->tableName);
- } else {
- // IMP: V24016_08086
- // IMP: V15177_32015
- vtab_set_error(&p->base,
- "Error inserting id into rowids shadow table: %s",
- sqlite3_errmsg(sqlite3_db_handle(p->stmtRowidsInsertId)));
- }
- rc = SQLITE_ERROR;
- goto complete;
- }
- *rowid = sqlite3_last_insert_rowid(p->db);
- rc = SQLITE_OK;
- complete:
- if (p->stmtRowidsInsertId) {
- sqlite3_reset(p->stmtRowidsInsertId);
- sqlite3_clear_bindings(p->stmtRowidsInsertId);
- }
- #if SQLITE_THREADSAFE
- if (sqlite3_mutex_leave && entered) {
- sqlite3_mutex_leave(sqlite3_db_mutex(p->db));
- }
- #endif
- return rc;
- }
- int vec0_metadata_chunk_size(vec0_metadata_column_kind kind, int chunk_size) {
- switch(kind) {
- case VEC0_METADATA_COLUMN_KIND_BOOLEAN:
- return chunk_size / 8;
- case VEC0_METADATA_COLUMN_KIND_INTEGER:
- return chunk_size * sizeof(i64);
- case VEC0_METADATA_COLUMN_KIND_FLOAT:
- return chunk_size * sizeof(double);
- case VEC0_METADATA_COLUMN_KIND_TEXT:
- return chunk_size * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH;
- }
- return 0;
- }
- int vec0_rowids_update_position(vec0_vtab *p, i64 rowid, i64 chunk_rowid,
- i64 chunk_offset) {
- int rc = SQLITE_OK;
- if (!p->stmtRowidsUpdatePosition) {
- const char *zSql = sqlite3_mprintf(" UPDATE " VEC0_SHADOW_ROWIDS_NAME
- " SET chunk_id = ?, chunk_offset = ?"
- " WHERE rowid = ?",
- p->schemaName, p->tableName);
- if (!zSql) {
- rc = SQLITE_NOMEM;
- goto cleanup;
- }
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &p->stmtRowidsUpdatePosition, 0);
- sqlite3_free((void *)zSql);
- if (rc != SQLITE_OK) {
- vtab_set_error(&p->base, VEC_INTERAL_ERROR
- "could not initialize 'update rowids position' statement");
- goto cleanup;
- }
- }
- sqlite3_bind_int64(p->stmtRowidsUpdatePosition, 1, chunk_rowid);
- sqlite3_bind_int64(p->stmtRowidsUpdatePosition, 2, chunk_offset);
- sqlite3_bind_int64(p->stmtRowidsUpdatePosition, 3, rowid);
- rc = sqlite3_step(p->stmtRowidsUpdatePosition);
- if (rc != SQLITE_DONE) {
- // IMP: V21925_05995
- vtab_set_error(&p->base,
- VEC_INTERAL_ERROR
- "could not update rowids position for rowid=%lld, "
- "chunk_rowid=%lld, chunk_offset=%lld",
- rowid, chunk_rowid, chunk_offset);
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- rc = SQLITE_OK;
- cleanup:
- if (p->stmtRowidsUpdatePosition) {
- sqlite3_reset(p->stmtRowidsUpdatePosition);
- sqlite3_clear_bindings(p->stmtRowidsUpdatePosition);
- }
- return rc;
- }
- /**
- * @brief Adds a new chunk for the vec0 table, and the corresponding vector
- * chunks.
- *
- * Inserts a new row into the _chunks table, with blank data, and uses that new
- * rowid to insert new blank rows into _vector_chunksXX tables.
- *
- * @param p: vec0 table to add new chunk
- * @param paritionKeyValues: Array of partition key valeus for the new chunk, if available
- * @param chunk_rowid: Output pointer, if not NULL, then will be filled with the
- * new chunk rowid.
- * @return int SQLITE_OK on success, error code otherwise.
- */
- int vec0_new_chunk(vec0_vtab *p, sqlite3_value ** partitionKeyValues, i64 *chunk_rowid) {
- int rc;
- char *zSql;
- sqlite3_stmt *stmt;
- i64 rowid;
- // Step 1: Insert a new row in _chunks, capture that new rowid
- if(p->numPartitionColumns > 0) {
- sqlite3_str * s = sqlite3_str_new(NULL);
- sqlite3_str_appendf(s, "INSERT INTO " VEC0_SHADOW_CHUNKS_NAME, p->schemaName, p->tableName);
- sqlite3_str_appendall(s, "(size, validity, rowids");
- for(int i = 0; i < p->numPartitionColumns; i++) {
- sqlite3_str_appendf(s, ", partition%02d", i);
- }
- sqlite3_str_appendall(s, ") VALUES (?, ?, ?");
- for(int i = 0; i < p->numPartitionColumns; i++) {
- sqlite3_str_appendall(s, ", ?");
- }
- sqlite3_str_appendall(s, ")");
- zSql = sqlite3_str_finish(s);
- }else {
- zSql = sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_CHUNKS_NAME
- "(size, validity, rowids) "
- "VALUES (?, ?, ?);",
- p->schemaName, p->tableName);
- }
- if (!zSql) {
- return SQLITE_NOMEM;
- }
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
- sqlite3_free(zSql);
- if (rc != SQLITE_OK) {
- sqlite3_finalize(stmt);
- return rc;
- }
- #if SQLITE_THREADSAFE
- if (sqlite3_mutex_enter) {
- sqlite3_mutex_enter(sqlite3_db_mutex(p->db));
- }
- #endif
- sqlite3_bind_int64(stmt, 1, p->chunk_size); // size
- sqlite3_bind_zeroblob(stmt, 2, p->chunk_size / CHAR_BIT); // validity bitmap
- sqlite3_bind_zeroblob(stmt, 3, p->chunk_size * sizeof(i64)); // rowids
- for(int i = 0; i < p->numPartitionColumns; i++) {
- sqlite3_bind_value(stmt, 4 + i, partitionKeyValues[i]);
- }
- rc = sqlite3_step(stmt);
- int failed = rc != SQLITE_DONE;
- rowid = sqlite3_last_insert_rowid(p->db);
- #if SQLITE_THREADSAFE
- if (sqlite3_mutex_leave) {
- sqlite3_mutex_leave(sqlite3_db_mutex(p->db));
- }
- #endif
- sqlite3_finalize(stmt);
- if (failed) {
- return SQLITE_ERROR;
- }
- // Step 2: Create new vector chunks for each vector column, with
- // that new chunk_rowid.
- for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
- if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_VECTOR) {
- continue;
- }
- int vector_column_idx = p->user_column_idxs[i];
- i64 vectorsSize =
- p->chunk_size * vector_column_byte_size(p->vector_columns[vector_column_idx]);
- zSql = sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_VECTOR_N_NAME
- "(rowid, vectors)"
- "VALUES (?, ?)",
- p->schemaName, p->tableName, vector_column_idx);
- if (!zSql) {
- return SQLITE_NOMEM;
- }
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
- sqlite3_free(zSql);
- if (rc != SQLITE_OK) {
- sqlite3_finalize(stmt);
- return rc;
- }
- sqlite3_bind_int64(stmt, 1, rowid);
- sqlite3_bind_zeroblob64(stmt, 2, vectorsSize);
- rc = sqlite3_step(stmt);
- sqlite3_finalize(stmt);
- if (rc != SQLITE_DONE) {
- return rc;
- }
- }
- // Step 3: Create new metadata chunks for each metadata column
- for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
- if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_METADATA) {
- continue;
- }
- int metadata_column_idx = p->user_column_idxs[i];
- zSql = sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_METADATA_N_NAME
- "(rowid, data)"
- "VALUES (?, ?)",
- p->schemaName, p->tableName, metadata_column_idx);
- if (!zSql) {
- return SQLITE_NOMEM;
- }
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
- sqlite3_free(zSql);
- if (rc != SQLITE_OK) {
- sqlite3_finalize(stmt);
- return rc;
- }
- sqlite3_bind_int64(stmt, 1, rowid);
- sqlite3_bind_zeroblob64(stmt, 2, vec0_metadata_chunk_size(p->metadata_columns[metadata_column_idx].kind, p->chunk_size));
- rc = sqlite3_step(stmt);
- sqlite3_finalize(stmt);
- if (rc != SQLITE_DONE) {
- return rc;
- }
- }
- if (chunk_rowid) {
- *chunk_rowid = rowid;
- }
- return SQLITE_OK;
- }
- struct vec0_query_fullscan_data {
- sqlite3_stmt *rowids_stmt;
- i8 done;
- };
- void vec0_query_fullscan_data_clear(
- struct vec0_query_fullscan_data *fullscan_data) {
- if (!fullscan_data)
- return;
- if (fullscan_data->rowids_stmt) {
- sqlite3_finalize(fullscan_data->rowids_stmt);
- fullscan_data->rowids_stmt = NULL;
- }
- }
- struct vec0_query_knn_data {
- i64 k;
- i64 k_used;
- // Array of rowids of size k. Must be freed with sqlite3_free().
- i64 *rowids;
- // Array of distances of size k. Must be freed with sqlite3_free().
- f32 *distances;
- i64 current_idx;
- };
- void vec0_query_knn_data_clear(struct vec0_query_knn_data *knn_data) {
- if (!knn_data)
- return;
- if (knn_data->rowids) {
- sqlite3_free(knn_data->rowids);
- knn_data->rowids = NULL;
- }
- if (knn_data->distances) {
- sqlite3_free(knn_data->distances);
- knn_data->distances = NULL;
- }
- }
- struct vec0_query_point_data {
- i64 rowid;
- void *vectors[VEC0_MAX_VECTOR_COLUMNS];
- int done;
- };
- void vec0_query_point_data_clear(struct vec0_query_point_data *point_data) {
- if (!point_data)
- return;
- for (int i = 0; i < VEC0_MAX_VECTOR_COLUMNS; i++) {
- sqlite3_free(point_data->vectors[i]);
- point_data->vectors[i] = NULL;
- }
- }
- typedef enum {
- // If any values are updated, please update the ARCHITECTURE.md docs accordingly!
- VEC0_QUERY_PLAN_FULLSCAN = '1',
- VEC0_QUERY_PLAN_POINT = '2',
- VEC0_QUERY_PLAN_KNN = '3',
- } vec0_query_plan;
- typedef struct vec0_cursor vec0_cursor;
- struct vec0_cursor {
- sqlite3_vtab_cursor base;
- vec0_query_plan query_plan;
- struct vec0_query_fullscan_data *fullscan_data;
- struct vec0_query_knn_data *knn_data;
- struct vec0_query_point_data *point_data;
- };
- void vec0_cursor_clear(vec0_cursor *pCur) {
- if (pCur->fullscan_data) {
- vec0_query_fullscan_data_clear(pCur->fullscan_data);
- sqlite3_free(pCur->fullscan_data);
- pCur->fullscan_data = NULL;
- }
- if (pCur->knn_data) {
- vec0_query_knn_data_clear(pCur->knn_data);
- sqlite3_free(pCur->knn_data);
- pCur->knn_data = NULL;
- }
- if (pCur->point_data) {
- vec0_query_point_data_clear(pCur->point_data);
- sqlite3_free(pCur->point_data);
- pCur->point_data = NULL;
- }
- }
- #define VEC_CONSTRUCTOR_ERROR "vec0 constructor error: "
- static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv,
- sqlite3_vtab **ppVtab, char **pzErr, bool isCreate) {
- UNUSED_PARAMETER(pAux);
- vec0_vtab *pNew;
- int rc;
- const char *zSql;
- pNew = sqlite3_malloc(sizeof(*pNew));
- if (pNew == 0)
- return SQLITE_NOMEM;
- memset(pNew, 0, sizeof(*pNew));
- // Declared chunk_size=N for entire table.
- // -1 to use the defualt, otherwise will get re-assigned on `chunk_size=N`
- // option
- int chunk_size = -1;
- int numVectorColumns = 0;
- int numPartitionColumns = 0;
- int numAuxiliaryColumns = 0;
- int numMetadataColumns = 0;
- int user_column_idx = 0;
- // track if a "primary key" column is defined
- char *pkColumnName = NULL;
- int pkColumnNameLength;
- int pkColumnType = SQLITE_INTEGER;
- for (int i = 3; i < argc; i++) {
- struct VectorColumnDefinition vecColumn;
- struct Vec0PartitionColumnDefinition partitionColumn;
- struct Vec0AuxiliaryColumnDefinition auxColumn;
- struct Vec0MetadataColumnDefinition metadataColumn;
- char *cName = NULL;
- int cNameLength;
- int cType;
- // Scenario #1: Constructor argument is a vector column definition, ie `foo float[1024]`
- rc = vec0_parse_vector_column(argv[i], strlen(argv[i]), &vecColumn);
- if (rc == SQLITE_ERROR) {
- *pzErr = sqlite3_mprintf(
- VEC_CONSTRUCTOR_ERROR "could not parse vector column '%s'", argv[i]);
- goto error;
- }
- if (rc == SQLITE_OK) {
- if (numVectorColumns >= VEC0_MAX_VECTOR_COLUMNS) {
- sqlite3_free(vecColumn.name);
- *pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
- "Too many provided vector columns, maximum %d",
- VEC0_MAX_VECTOR_COLUMNS);
- goto error;
- }
- if (vecColumn.dimensions > SQLITE_VEC_VEC0_MAX_DIMENSIONS) {
- sqlite3_free(vecColumn.name);
- *pzErr = sqlite3_mprintf(
- VEC_CONSTRUCTOR_ERROR
- "Dimension on vector column too large, provided %lld, maximum %lld",
- (i64)vecColumn.dimensions, SQLITE_VEC_VEC0_MAX_DIMENSIONS);
- goto error;
- }
- pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_VECTOR;
- pNew->user_column_idxs[user_column_idx] = numVectorColumns;
- memcpy(&pNew->vector_columns[numVectorColumns], &vecColumn, sizeof(vecColumn));
- numVectorColumns++;
- user_column_idx++;
- continue;
- }
- // Scenario #2: Constructor argument is a partition key column definition, ie `user_id text partition key`
- rc = vec0_parse_partition_key_definition(argv[i], strlen(argv[i]), &cName,
- &cNameLength, &cType);
- if (rc == SQLITE_OK) {
- if (numPartitionColumns >= VEC0_MAX_PARTITION_COLUMNS) {
- *pzErr = sqlite3_mprintf(
- VEC_CONSTRUCTOR_ERROR
- "More than %d partition key columns were provided",
- VEC0_MAX_PARTITION_COLUMNS);
- goto error;
- }
- partitionColumn.type = cType;
- partitionColumn.name_length = cNameLength;
- partitionColumn.name = sqlite3_mprintf("%.*s", cNameLength, cName);
- if(!partitionColumn.name) {
- rc = SQLITE_NOMEM;
- goto error;
- }
- pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_PARTITION;
- pNew->user_column_idxs[user_column_idx] = numPartitionColumns;
- memcpy(&pNew->paritition_columns[numPartitionColumns], &partitionColumn, sizeof(partitionColumn));
- numPartitionColumns++;
- user_column_idx++;
- continue;
- }
- // Scenario #3: Constructor argument is a primary key column definition, ie `article_id text primary key`
- rc = vec0_parse_primary_key_definition(argv[i], strlen(argv[i]), &cName,
- &cNameLength, &cType);
- if (rc == SQLITE_OK) {
- if (pkColumnName) {
- *pzErr = sqlite3_mprintf(
- VEC_CONSTRUCTOR_ERROR
- "More than one primary key definition was provided, vec0 only "
- "suports a single primary key column",
- argv[i]);
- goto error;
- }
- pkColumnName = cName;
- pkColumnNameLength = cNameLength;
- pkColumnType = cType;
- continue;
- }
- // Scenario #4: Constructor argument is a auxiliary column definition, ie `+contents text`
- rc = vec0_parse_auxiliary_column_definition(argv[i], strlen(argv[i]), &cName,
- &cNameLength, &cType);
- if(rc == SQLITE_OK) {
- if (numAuxiliaryColumns >= VEC0_MAX_AUXILIARY_COLUMNS) {
- *pzErr = sqlite3_mprintf(
- VEC_CONSTRUCTOR_ERROR
- "More than %d auxiliary columns were provided",
- VEC0_MAX_AUXILIARY_COLUMNS);
- goto error;
- }
- auxColumn.type = cType;
- auxColumn.name_length = cNameLength;
- auxColumn.name = sqlite3_mprintf("%.*s", cNameLength, cName);
- if(!auxColumn.name) {
- rc = SQLITE_NOMEM;
- goto error;
- }
- pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY;
- pNew->user_column_idxs[user_column_idx] = numAuxiliaryColumns;
- memcpy(&pNew->auxiliary_columns[numAuxiliaryColumns], &auxColumn, sizeof(auxColumn));
- numAuxiliaryColumns++;
- user_column_idx++;
- continue;
- }
- vec0_metadata_column_kind kind;
- rc = vec0_parse_metadata_column_definition(argv[i], strlen(argv[i]), &cName,
- &cNameLength, &kind);
- if(rc == SQLITE_OK) {
- if (numMetadataColumns >= VEC0_MAX_METADATA_COLUMNS) {
- *pzErr = sqlite3_mprintf(
- VEC_CONSTRUCTOR_ERROR
- "More than %d metadata columns were provided",
- VEC0_MAX_METADATA_COLUMNS);
- goto error;
- }
- metadataColumn.kind = kind;
- metadataColumn.name_length = cNameLength;
- metadataColumn.name = sqlite3_mprintf("%.*s", cNameLength, cName);
- if(!metadataColumn.name) {
- rc = SQLITE_NOMEM;
- goto error;
- }
- pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_METADATA;
- pNew->user_column_idxs[user_column_idx] = numMetadataColumns;
- memcpy(&pNew->metadata_columns[numMetadataColumns], &metadataColumn, sizeof(metadataColumn));
- numMetadataColumns++;
- user_column_idx++;
- continue;
- }
- // Scenario #4: Constructor argument is a table-level option, ie `chunk_size`
- char *key;
- char *value;
- int keyLength, valueLength;
- rc = vec0_parse_table_option(argv[i], strlen(argv[i]), &key, &keyLength,
- &value, &valueLength);
- if (rc == SQLITE_ERROR) {
- *pzErr = sqlite3_mprintf(
- VEC_CONSTRUCTOR_ERROR "could not parse table option '%s'", argv[i]);
- goto error;
- }
- if (rc == SQLITE_OK) {
- if (sqlite3_strnicmp(key, "chunk_size", keyLength) == 0) {
- chunk_size = atoi(value);
- if (chunk_size <= 0) {
- // IMP: V01931_18769
- *pzErr =
- sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
- "chunk_size must be a non-zero positive integer");
- goto error;
- }
- if ((chunk_size % 8) != 0) {
- // IMP: V14110_30948
- *pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
- "chunk_size must be divisible by 8");
- goto error;
- }
- #define SQLITE_VEC_CHUNK_SIZE_MAX 4096
- if (chunk_size > SQLITE_VEC_CHUNK_SIZE_MAX) {
- *pzErr =
- sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR "chunk_size too large");
- goto error;
- }
- } else {
- // IMP: V27642_11712
- *pzErr = sqlite3_mprintf(
- VEC_CONSTRUCTOR_ERROR "Unknown table option: %.*s", keyLength, key);
- goto error;
- }
- continue;
- }
- // Scenario #5: Unknown constructor argument
- *pzErr =
- sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR "Could not parse '%s'", argv[i]);
- goto error;
- }
- if (chunk_size < 0) {
- chunk_size = 1024;
- }
- if (numVectorColumns <= 0) {
- *pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
- "At least one vector column is required");
- goto error;
- }
- sqlite3_str *createStr = sqlite3_str_new(NULL);
- sqlite3_str_appendall(createStr, "CREATE TABLE x(");
- if (pkColumnName) {
- sqlite3_str_appendf(createStr, "\"%.*w\" primary key, ", pkColumnNameLength,
- pkColumnName);
- } else {
- sqlite3_str_appendall(createStr, "rowid, ");
- }
- for (int i = 0; i < numVectorColumns + numPartitionColumns + numAuxiliaryColumns + numMetadataColumns; i++) {
- switch(pNew->user_column_kinds[i]) {
- case SQLITE_VEC0_USER_COLUMN_KIND_VECTOR: {
- int vector_idx = pNew->user_column_idxs[i];
- sqlite3_str_appendf(createStr, "\"%.*w\", ",
- pNew->vector_columns[vector_idx].name_length,
- pNew->vector_columns[vector_idx].name);
- break;
- }
- case SQLITE_VEC0_USER_COLUMN_KIND_PARTITION: {
- int partition_idx = pNew->user_column_idxs[i];
- sqlite3_str_appendf(createStr, "\"%.*w\", ",
- pNew->paritition_columns[partition_idx].name_length,
- pNew->paritition_columns[partition_idx].name);
- break;
- }
- case SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY: {
- int auxiliary_idx = pNew->user_column_idxs[i];
- sqlite3_str_appendf(createStr, "\"%.*w\", ",
- pNew->auxiliary_columns[auxiliary_idx].name_length,
- pNew->auxiliary_columns[auxiliary_idx].name);
- break;
- }
- case SQLITE_VEC0_USER_COLUMN_KIND_METADATA: {
- int metadata_idx = pNew->user_column_idxs[i];
- sqlite3_str_appendf(createStr, "\"%.*w\", ",
- pNew->metadata_columns[metadata_idx].name_length,
- pNew->metadata_columns[metadata_idx].name);
- break;
- }
- }
- }
- sqlite3_str_appendall(createStr, " distance hidden, k hidden) ");
- if (pkColumnName) {
- sqlite3_str_appendall(createStr, "without rowid ");
- }
- zSql = sqlite3_str_finish(createStr);
- if (!zSql) {
- goto error;
- }
- rc = sqlite3_declare_vtab(db, zSql);
- sqlite3_free((void *)zSql);
- if (rc != SQLITE_OK) {
- *pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
- "could not declare virtual table, '%s'",
- sqlite3_errmsg(db));
- goto error;
- }
- const char *schemaName = argv[1];
- const char *tableName = argv[2];
- pNew->db = db;
- pNew->pkIsText = pkColumnType == SQLITE_TEXT;
- pNew->schemaName = sqlite3_mprintf("%s", schemaName);
- if (!pNew->schemaName) {
- goto error;
- }
- pNew->tableName = sqlite3_mprintf("%s", tableName);
- if (!pNew->tableName) {
- goto error;
- }
- pNew->shadowRowidsName = sqlite3_mprintf("%s_rowids", tableName);
- if (!pNew->shadowRowidsName) {
- goto error;
- }
- pNew->shadowChunksName = sqlite3_mprintf("%s_chunks", tableName);
- if (!pNew->shadowChunksName) {
- goto error;
- }
- pNew->numVectorColumns = numVectorColumns;
- pNew->numPartitionColumns = numPartitionColumns;
- pNew->numAuxiliaryColumns = numAuxiliaryColumns;
- pNew->numMetadataColumns = numMetadataColumns;
- for (int i = 0; i < pNew->numVectorColumns; i++) {
- pNew->shadowVectorChunksNames[i] =
- sqlite3_mprintf("%s_vector_chunks%02d", tableName, i);
- if (!pNew->shadowVectorChunksNames[i]) {
- goto error;
- }
- }
- for (int i = 0; i < pNew->numMetadataColumns; i++) {
- pNew->shadowMetadataChunksNames[i] =
- sqlite3_mprintf("%s_metadatachunks%02d", tableName, i);
- if (!pNew->shadowMetadataChunksNames[i]) {
- goto error;
- }
- }
- pNew->chunk_size = chunk_size;
- // if xCreate, then create the necessary shadow tables
- if (isCreate) {
- sqlite3_stmt *stmt;
- int rc;
- char * zCreateInfo = sqlite3_mprintf("CREATE TABLE "VEC0_SHADOW_INFO_NAME " (key text primary key, value any)", pNew->schemaName, pNew->tableName);
- if(!zCreateInfo) {
- goto error;
- }
- rc = sqlite3_prepare_v2(db, zCreateInfo, -1, &stmt, NULL);
- sqlite3_free((void *) zCreateInfo);
- if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
- // TODO(IMP)
- sqlite3_finalize(stmt);
- *pzErr = sqlite3_mprintf("Could not create '_info' shadow table: %s",
- sqlite3_errmsg(db));
- goto error;
- }
- sqlite3_finalize(stmt);
- char * zSeedInfo = sqlite3_mprintf(
- "INSERT INTO "VEC0_SHADOW_INFO_NAME "(key, value) VALUES "
- "(?1, ?2), (?3, ?4), (?5, ?6), (?7, ?8) ",
- pNew->schemaName, pNew->tableName
- );
- if(!zSeedInfo) {
- goto error;
- }
- rc = sqlite3_prepare_v2(db, zSeedInfo, -1, &stmt, NULL);
- sqlite3_free((void *) zSeedInfo);
- if (rc != SQLITE_OK) {
- // TODO(IMP)
- sqlite3_finalize(stmt);
- *pzErr = sqlite3_mprintf("Could not seed '_info' shadow table: %s",
- sqlite3_errmsg(db));
- goto error;
- }
- sqlite3_bind_text(stmt, 1, "CREATE_VERSION", -1, SQLITE_STATIC);
- sqlite3_bind_text(stmt, 2, SQLITE_VEC_VERSION, -1, SQLITE_STATIC);
- sqlite3_bind_text(stmt, 3, "CREATE_VERSION_MAJOR", -1, SQLITE_STATIC);
- sqlite3_bind_int(stmt, 4, SQLITE_VEC_VERSION_MAJOR);
- sqlite3_bind_text(stmt, 5, "CREATE_VERSION_MINOR", -1, SQLITE_STATIC);
- sqlite3_bind_int(stmt, 6, SQLITE_VEC_VERSION_MINOR);
- sqlite3_bind_text(stmt, 7, "CREATE_VERSION_PATCH", -1, SQLITE_STATIC);
- sqlite3_bind_int(stmt, 8, SQLITE_VEC_VERSION_PATCH);
- if(sqlite3_step(stmt) != SQLITE_DONE) {
- // TODO(IMP)
- sqlite3_finalize(stmt);
- *pzErr = sqlite3_mprintf("Could not seed '_info' shadow table: %s",
- sqlite3_errmsg(db));
- goto error;
- }
- sqlite3_finalize(stmt);
- // create the _chunks shadow table
- char *zCreateShadowChunks = NULL;
- if(pNew->numPartitionColumns) {
- sqlite3_str * s = sqlite3_str_new(NULL);
- sqlite3_str_appendf(s, "CREATE TABLE " VEC0_SHADOW_CHUNKS_NAME "(", pNew->schemaName, pNew->tableName);
- sqlite3_str_appendall(s, "chunk_id INTEGER PRIMARY KEY AUTOINCREMENT," "size INTEGER NOT NULL,");
- sqlite3_str_appendall(s, "sequence_id integer,");
- for(int i = 0; i < pNew->numPartitionColumns;i++) {
- sqlite3_str_appendf(s, "partition%02d,", i);
- }
- sqlite3_str_appendall(s, "validity BLOB NOT NULL, rowids BLOB NOT NULL);");
- zCreateShadowChunks = sqlite3_str_finish(s);
- }else {
- zCreateShadowChunks = sqlite3_mprintf(VEC0_SHADOW_CHUNKS_CREATE,
- pNew->schemaName, pNew->tableName);
- }
- if (!zCreateShadowChunks) {
- goto error;
- }
- rc = sqlite3_prepare_v2(db, zCreateShadowChunks, -1, &stmt, 0);
- sqlite3_free((void *)zCreateShadowChunks);
- if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
- // IMP: V17740_01811
- sqlite3_finalize(stmt);
- *pzErr = sqlite3_mprintf("Could not create '_chunks' shadow table: %s",
- sqlite3_errmsg(db));
- goto error;
- }
- sqlite3_finalize(stmt);
- // create the _rowids shadow table
- char *zCreateShadowRowids;
- if (pNew->pkIsText) {
- // adds a "text unique not null" constraint to the id column
- zCreateShadowRowids = sqlite3_mprintf(VEC0_SHADOW_ROWIDS_CREATE_PK_TEXT,
- pNew->schemaName, pNew->tableName);
- } else {
- zCreateShadowRowids = sqlite3_mprintf(VEC0_SHADOW_ROWIDS_CREATE_BASIC,
- pNew->schemaName, pNew->tableName);
- }
- if (!zCreateShadowRowids) {
- goto error;
- }
- rc = sqlite3_prepare_v2(db, zCreateShadowRowids, -1, &stmt, 0);
- sqlite3_free((void *)zCreateShadowRowids);
- if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
- // IMP: V11631_28470
- sqlite3_finalize(stmt);
- *pzErr = sqlite3_mprintf("Could not create '_rowids' shadow table: %s",
- sqlite3_errmsg(db));
- goto error;
- }
- sqlite3_finalize(stmt);
- for (int i = 0; i < pNew->numVectorColumns; i++) {
- char *zSql = sqlite3_mprintf(VEC0_SHADOW_VECTOR_N_CREATE,
- pNew->schemaName, pNew->tableName, i);
- if (!zSql) {
- goto error;
- }
- rc = sqlite3_prepare_v2(db, zSql, -1, &stmt, 0);
- sqlite3_free((void *)zSql);
- if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
- // IMP: V25919_09989
- sqlite3_finalize(stmt);
- *pzErr = sqlite3_mprintf(
- "Could not create '_vector_chunks%02d' shadow table: %s", i,
- sqlite3_errmsg(db));
- goto error;
- }
- sqlite3_finalize(stmt);
- }
- for (int i = 0; i < pNew->numMetadataColumns; i++) {
- char *zSql = sqlite3_mprintf("CREATE TABLE " VEC0_SHADOW_METADATA_N_NAME "(rowid PRIMARY KEY, data BLOB NOT NULL);",
- pNew->schemaName, pNew->tableName, i);
- if (!zSql) {
- goto error;
- }
- rc = sqlite3_prepare_v2(db, zSql, -1, &stmt, 0);
- sqlite3_free((void *)zSql);
- if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
- sqlite3_finalize(stmt);
- *pzErr = sqlite3_mprintf(
- "Could not create '_metata_chunks%02d' shadow table: %s", i,
- sqlite3_errmsg(db));
- goto error;
- }
- sqlite3_finalize(stmt);
- if(pNew->metadata_columns[i].kind == VEC0_METADATA_COLUMN_KIND_TEXT) {
- char *zSql = sqlite3_mprintf("CREATE TABLE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME "(rowid PRIMARY KEY, data TEXT);",
- pNew->schemaName, pNew->tableName, i);
- if (!zSql) {
- goto error;
- }
- rc = sqlite3_prepare_v2(db, zSql, -1, &stmt, 0);
- sqlite3_free((void *)zSql);
- if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
- sqlite3_finalize(stmt);
- *pzErr = sqlite3_mprintf(
- "Could not create '_metadatatext%02d' shadow table: %s", i,
- sqlite3_errmsg(db));
- goto error;
- }
- sqlite3_finalize(stmt);
- }
- }
- if(pNew->numAuxiliaryColumns > 0) {
- sqlite3_stmt * stmt;
- sqlite3_str * s = sqlite3_str_new(NULL);
- sqlite3_str_appendf(s, "CREATE TABLE " VEC0_SHADOW_AUXILIARY_NAME "( rowid integer PRIMARY KEY ", pNew->schemaName, pNew->tableName);
- for(int i = 0; i < pNew->numAuxiliaryColumns; i++) {
- sqlite3_str_appendf(s, ", value%02d", i);
- }
- sqlite3_str_appendall(s, ")");
- char *zSql = sqlite3_str_finish(s);
- if(!zSql) {
- goto error;
- }
- rc = sqlite3_prepare_v2(db, zSql, -1, &stmt, NULL);
- if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
- sqlite3_finalize(stmt);
- *pzErr = sqlite3_mprintf(
- "Could not create auxiliary shadow table: %s",
- sqlite3_errmsg(db));
- goto error;
- }
- sqlite3_finalize(stmt);
- }
- }
- *ppVtab = (sqlite3_vtab *)pNew;
- return SQLITE_OK;
- error:
- vec0_free(pNew);
- return SQLITE_ERROR;
- }
- static int vec0Create(sqlite3 *db, void *pAux, int argc,
- const char *const *argv, sqlite3_vtab **ppVtab,
- char **pzErr) {
- return vec0_init(db, pAux, argc, argv, ppVtab, pzErr, true);
- }
- static int vec0Connect(sqlite3 *db, void *pAux, int argc,
- const char *const *argv, sqlite3_vtab **ppVtab,
- char **pzErr) {
- return vec0_init(db, pAux, argc, argv, ppVtab, pzErr, false);
- }
- static int vec0Disconnect(sqlite3_vtab *pVtab) {
- vec0_vtab *p = (vec0_vtab *)pVtab;
- vec0_free(p);
- sqlite3_free(p);
- return SQLITE_OK;
- }
- static int vec0Destroy(sqlite3_vtab *pVtab) {
- vec0_vtab *p = (vec0_vtab *)pVtab;
- sqlite3_stmt *stmt;
- int rc;
- const char *zSql;
- // Free up any sqlite3_stmt, otherwise DROPs on those tables will fail
- vec0_free_resources(p);
- // TODO(test) later: can't evidence-of here, bc always gives "SQL logic error" instead of
- // provided error
- zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_CHUNKS_NAME, p->schemaName,
- p->tableName);
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
- sqlite3_free((void *)zSql);
- if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
- rc = SQLITE_ERROR;
- vtab_set_error(pVtab, "could not drop chunks shadow table");
- goto done;
- }
- sqlite3_finalize(stmt);
- zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_INFO_NAME, p->schemaName,
- p->tableName);
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
- sqlite3_free((void *)zSql);
- if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
- rc = SQLITE_ERROR;
- vtab_set_error(pVtab, "could not drop info shadow table");
- goto done;
- }
- sqlite3_finalize(stmt);
- zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_ROWIDS_NAME, p->schemaName,
- p->tableName);
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
- sqlite3_free((void *)zSql);
- if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
- rc = SQLITE_ERROR;
- goto done;
- }
- sqlite3_finalize(stmt);
- for (int i = 0; i < p->numVectorColumns; i++) {
- zSql = sqlite3_mprintf("DROP TABLE \"%w\".\"%w\"", p->schemaName,
- p->shadowVectorChunksNames[i]);
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
- sqlite3_free((void *)zSql);
- if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
- rc = SQLITE_ERROR;
- goto done;
- }
- sqlite3_finalize(stmt);
- }
- if(p->numAuxiliaryColumns > 0) {
- zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_AUXILIARY_NAME, p->schemaName, p->tableName);
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
- sqlite3_free((void *)zSql);
- if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
- rc = SQLITE_ERROR;
- goto done;
- }
- sqlite3_finalize(stmt);
- }
- for (int i = 0; i < p->numMetadataColumns; i++) {
- zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_METADATA_N_NAME, p->schemaName,p->tableName, i);
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
- sqlite3_free((void *)zSql);
- if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
- rc = SQLITE_ERROR;
- goto done;
- }
- sqlite3_finalize(stmt);
- if(p->metadata_columns[i].kind == VEC0_METADATA_COLUMN_KIND_TEXT) {
- zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME, p->schemaName,p->tableName, i);
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
- sqlite3_free((void *)zSql);
- if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
- rc = SQLITE_ERROR;
- goto done;
- }
- sqlite3_finalize(stmt);
- }
- }
- stmt = NULL;
- rc = SQLITE_OK;
- done:
- sqlite3_finalize(stmt);
- vec0_free(p);
- // If there was an error
- if (rc == SQLITE_OK) {
- sqlite3_free(p);
- }
- return rc;
- }
- static int vec0Open(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) {
- UNUSED_PARAMETER(p);
- vec0_cursor *pCur;
- pCur = sqlite3_malloc(sizeof(*pCur));
- if (pCur == 0)
- return SQLITE_NOMEM;
- memset(pCur, 0, sizeof(*pCur));
- *ppCursor = &pCur->base;
- return SQLITE_OK;
- }
- static int vec0Close(sqlite3_vtab_cursor *cur) {
- vec0_cursor *pCur = (vec0_cursor *)cur;
- vec0_cursor_clear(pCur);
- sqlite3_free(pCur);
- return SQLITE_OK;
- }
- // All the different type of "values" provided to argv/argc in vec0Filter.
- // These enums denote the use and purpose of all of them.
- typedef enum {
- // If any values are updated, please update the ARCHITECTURE.md docs accordingly!
- VEC0_IDXSTR_KIND_KNN_MATCH = '{',
- VEC0_IDXSTR_KIND_KNN_K = '}',
- VEC0_IDXSTR_KIND_KNN_ROWID_IN = '[',
- VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT = ']',
- VEC0_IDXSTR_KIND_POINT_ID = '!',
- VEC0_IDXSTR_KIND_METADATA_CONSTRAINT = '&',
- } vec0_idxstr_kind;
- // The different SQLITE_INDEX_CONSTRAINT values that vec0 partition key columns
- // support, but as characters that fit nicely in idxstr.
- typedef enum {
- // If any values are updated, please update the ARCHITECTURE.md docs accordingly!
- VEC0_PARTITION_OPERATOR_EQ = 'a',
- VEC0_PARTITION_OPERATOR_GT = 'b',
- VEC0_PARTITION_OPERATOR_LE = 'c',
- VEC0_PARTITION_OPERATOR_LT = 'd',
- VEC0_PARTITION_OPERATOR_GE = 'e',
- VEC0_PARTITION_OPERATOR_NE = 'f',
- } vec0_partition_operator;
- typedef enum {
- VEC0_METADATA_OPERATOR_EQ = 'a',
- VEC0_METADATA_OPERATOR_GT = 'b',
- VEC0_METADATA_OPERATOR_LE = 'c',
- VEC0_METADATA_OPERATOR_LT = 'd',
- VEC0_METADATA_OPERATOR_GE = 'e',
- VEC0_METADATA_OPERATOR_NE = 'f',
- VEC0_METADATA_OPERATOR_IN = 'g',
- } vec0_metadata_operator;
- static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
- vec0_vtab *p = (vec0_vtab *)pVTab;
- /**
- * Possible query plans are:
- * 1. KNN when:
- * a) An `MATCH` op on vector column
- * b) ORDER BY on distance column
- * c) LIMIT
- * d) rowid in (...) OPTIONAL
- * 2. Point when:
- * a) An `EQ` op on rowid column
- * 3. else: fullscan
- *
- */
- int iMatchTerm = -1;
- int iMatchVectorTerm = -1;
- int iLimitTerm = -1;
- int iRowidTerm = -1;
- int iKTerm = -1;
- int iRowidInTerm = -1;
- int hasAuxConstraint = 0;
- #ifdef SQLITE_VEC_DEBUG
- printf("pIdxInfo->nOrderBy=%d, pIdxInfo->nConstraint=%d\n", pIdxInfo->nOrderBy, pIdxInfo->nConstraint);
- #endif
- for (int i = 0; i < pIdxInfo->nConstraint; i++) {
- u8 vtabIn = 0;
- #if COMPILER_SUPPORTS_VTAB_IN
- if (sqlite3_libversion_number() >= 3038000) {
- vtabIn = sqlite3_vtab_in(pIdxInfo, i, -1);
- }
- #endif
- #ifdef SQLITE_VEC_DEBUG
- printf("xBestIndex [%d] usable=%d iColumn=%d op=%d vtabin=%d\n", i,
- pIdxInfo->aConstraint[i].usable, pIdxInfo->aConstraint[i].iColumn,
- pIdxInfo->aConstraint[i].op, vtabIn);
- #endif
- if (!pIdxInfo->aConstraint[i].usable)
- continue;
- int iColumn = pIdxInfo->aConstraint[i].iColumn;
- int op = pIdxInfo->aConstraint[i].op;
- if (op == SQLITE_INDEX_CONSTRAINT_LIMIT) {
- iLimitTerm = i;
- }
- if (op == SQLITE_INDEX_CONSTRAINT_MATCH &&
- vec0_column_idx_is_vector(p, iColumn)) {
- if (iMatchTerm > -1) {
- vtab_set_error(
- pVTab, "only 1 MATCH operator is allowed in a single vec0 query");
- return SQLITE_ERROR;
- }
- iMatchTerm = i;
- iMatchVectorTerm = vec0_column_idx_to_vector_idx(p, iColumn);
- }
- if (op == SQLITE_INDEX_CONSTRAINT_EQ && iColumn == VEC0_COLUMN_ID) {
- if (vtabIn) {
- if (iRowidInTerm != -1) {
- vtab_set_error(pVTab, "only 1 'rowid in (..)' operator is allowed in "
- "a single vec0 query");
- return SQLITE_ERROR;
- }
- iRowidInTerm = i;
- } else {
- iRowidTerm = i;
- }
- }
- if (op == SQLITE_INDEX_CONSTRAINT_EQ && iColumn == vec0_column_k_idx(p)) {
- iKTerm = i;
- }
- if(
- (op != SQLITE_INDEX_CONSTRAINT_LIMIT && op != SQLITE_INDEX_CONSTRAINT_OFFSET)
- && vec0_column_idx_is_auxiliary(p, iColumn)) {
- hasAuxConstraint = 1;
- }
- }
- sqlite3_str *idxStr = sqlite3_str_new(NULL);
- int rc;
- if (iMatchTerm >= 0) {
- if (iLimitTerm < 0 && iKTerm < 0) {
- vtab_set_error(
- pVTab,
- "A LIMIT or 'k = ?' constraint is required on vec0 knn queries.");
- rc = SQLITE_ERROR;
- goto done;
- }
- if (iLimitTerm >= 0 && iKTerm >= 0) {
- vtab_set_error(pVTab, "Only LIMIT or 'k =?' can be provided, not both");
- rc = SQLITE_ERROR;
- goto done;
- }
- if (pIdxInfo->nOrderBy) {
- if (pIdxInfo->nOrderBy > 1) {
- vtab_set_error(pVTab, "Only a single 'ORDER BY distance' clause is "
- "allowed on vec0 KNN queries");
- rc = SQLITE_ERROR;
- goto done;
- }
- if (pIdxInfo->aOrderBy[0].iColumn != vec0_column_distance_idx(p)) {
- vtab_set_error(pVTab,
- "Only a single 'ORDER BY distance' clause is allowed on "
- "vec0 KNN queries, not on other columns");
- rc = SQLITE_ERROR;
- goto done;
- }
- if (pIdxInfo->aOrderBy[0].desc) {
- vtab_set_error(
- pVTab, "Only ascending in ORDER BY distance clause is supported, "
- "DESC is not supported yet.");
- rc = SQLITE_ERROR;
- goto done;
- }
- }
- if(hasAuxConstraint) {
- // IMP: V25623_09693
- vtab_set_error(pVTab, "An illegal WHERE constraint was provided on a vec0 auxiliary column in a KNN query.");
- rc = SQLITE_ERROR;
- goto done;
- }
- sqlite3_str_appendchar(idxStr, 1, VEC0_QUERY_PLAN_KNN);
- int argvIndex = 1;
- pIdxInfo->aConstraintUsage[iMatchTerm].argvIndex = argvIndex++;
- pIdxInfo->aConstraintUsage[iMatchTerm].omit = 1;
- sqlite3_str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_MATCH);
- sqlite3_str_appendchar(idxStr, 3, '_');
- if (iLimitTerm >= 0) {
- pIdxInfo->aConstraintUsage[iLimitTerm].argvIndex = argvIndex++;
- pIdxInfo->aConstraintUsage[iLimitTerm].omit = 1;
- } else {
- pIdxInfo->aConstraintUsage[iKTerm].argvIndex = argvIndex++;
- pIdxInfo->aConstraintUsage[iKTerm].omit = 1;
- }
- sqlite3_str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_K);
- sqlite3_str_appendchar(idxStr, 3, '_');
- #if COMPILER_SUPPORTS_VTAB_IN
- if (iRowidInTerm >= 0) {
- // already validated as >= SQLite 3.38 bc iRowidInTerm is only >= 0 when
- // vtabIn == 1
- sqlite3_vtab_in(pIdxInfo, iRowidInTerm, 1);
- pIdxInfo->aConstraintUsage[iRowidInTerm].argvIndex = argvIndex++;
- pIdxInfo->aConstraintUsage[iRowidInTerm].omit = 1;
- sqlite3_str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_ROWID_IN);
- sqlite3_str_appendchar(idxStr, 3, '_');
- }
- #endif
- for (int i = 0; i < pIdxInfo->nConstraint; i++) {
- if (!pIdxInfo->aConstraint[i].usable)
- continue;
- int iColumn = pIdxInfo->aConstraint[i].iColumn;
- int op = pIdxInfo->aConstraint[i].op;
- if(op == SQLITE_INDEX_CONSTRAINT_LIMIT || op == SQLITE_INDEX_CONSTRAINT_OFFSET) {
- continue;
- }
- if(!vec0_column_idx_is_partition(p, iColumn)) {
- continue;
- }
- int partition_idx = vec0_column_idx_to_partition_idx(p, iColumn);
- char value = 0;
- switch(op) {
- case SQLITE_INDEX_CONSTRAINT_EQ: {
- value = VEC0_PARTITION_OPERATOR_EQ;
- break;
- }
- case SQLITE_INDEX_CONSTRAINT_GT: {
- value = VEC0_PARTITION_OPERATOR_GT;
- break;
- }
- case SQLITE_INDEX_CONSTRAINT_LE: {
- value = VEC0_PARTITION_OPERATOR_LE;
- break;
- }
- case SQLITE_INDEX_CONSTRAINT_LT: {
- value = VEC0_PARTITION_OPERATOR_LT;
- break;
- }
- case SQLITE_INDEX_CONSTRAINT_GE: {
- value = VEC0_PARTITION_OPERATOR_GE;
- break;
- }
- case SQLITE_INDEX_CONSTRAINT_NE: {
- value = VEC0_PARTITION_OPERATOR_NE;
- break;
- }
- }
- if(value) {
- pIdxInfo->aConstraintUsage[i].argvIndex = argvIndex++;
- pIdxInfo->aConstraintUsage[i].omit = 1;
- sqlite3_str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT);
- sqlite3_str_appendchar(idxStr, 1, 'A' + partition_idx);
- sqlite3_str_appendchar(idxStr, 1, value);
- sqlite3_str_appendchar(idxStr, 1, '_');
- }
- }
- for (int i = 0; i < pIdxInfo->nConstraint; i++) {
- if (!pIdxInfo->aConstraint[i].usable)
- continue;
- int iColumn = pIdxInfo->aConstraint[i].iColumn;
- int op = pIdxInfo->aConstraint[i].op;
- if(op == SQLITE_INDEX_CONSTRAINT_LIMIT || op == SQLITE_INDEX_CONSTRAINT_OFFSET) {
- continue;
- }
- if(!vec0_column_idx_is_metadata(p, iColumn)) {
- continue;
- }
- int metadata_idx = vec0_column_idx_to_metadata_idx(p, iColumn);
- char value = 0;
- switch(op) {
- case SQLITE_INDEX_CONSTRAINT_EQ: {
- int vtabIn = 0;
- #if COMPILER_SUPPORTS_VTAB_IN
- if (sqlite3_libversion_number() >= 3038000) {
- vtabIn = sqlite3_vtab_in(pIdxInfo, i, -1);
- }
- if(vtabIn) {
- switch(p->metadata_columns[metadata_idx].kind) {
- case VEC0_METADATA_COLUMN_KIND_FLOAT:
- case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
- // IMP: V15248_32086
- rc = SQLITE_ERROR;
- vtab_set_error(pVTab, "'xxx in (...)' is only available on INTEGER or TEXT metadata columns.");
- goto done;
- break;
- }
- case VEC0_METADATA_COLUMN_KIND_INTEGER:
- case VEC0_METADATA_COLUMN_KIND_TEXT: {
- break;
- }
- }
- value = VEC0_METADATA_OPERATOR_IN;
- sqlite3_vtab_in(pIdxInfo, i, 1);
- }else
- #endif
- {
- value = VEC0_PARTITION_OPERATOR_EQ;
- }
- break;
- }
- case SQLITE_INDEX_CONSTRAINT_GT: {
- value = VEC0_METADATA_OPERATOR_GT;
- break;
- }
- case SQLITE_INDEX_CONSTRAINT_LE: {
- value = VEC0_METADATA_OPERATOR_LE;
- break;
- }
- case SQLITE_INDEX_CONSTRAINT_LT: {
- value = VEC0_METADATA_OPERATOR_LT;
- break;
- }
- case SQLITE_INDEX_CONSTRAINT_GE: {
- value = VEC0_METADATA_OPERATOR_GE;
- break;
- }
- case SQLITE_INDEX_CONSTRAINT_NE: {
- value = VEC0_METADATA_OPERATOR_NE;
- break;
- }
- default: {
- // IMP: V16511_00582
- rc = SQLITE_ERROR;
- vtab_set_error(pVTab,
- "An illegal WHERE constraint was provided on a vec0 metadata column in a KNN query. "
- "Only one of EQUALS, GREATER_THAN, LESS_THAN_OR_EQUAL, LESS_THAN, GREATER_THAN_OR_EQUAL, NOT_EQUALS is allowed."
- );
- goto done;
- }
- }
- if(p->metadata_columns[metadata_idx].kind == VEC0_METADATA_COLUMN_KIND_BOOLEAN) {
- if(!(value == VEC0_METADATA_OPERATOR_EQ || value == VEC0_METADATA_OPERATOR_NE)) {
- // IMP: V10145_26984
- rc = SQLITE_ERROR;
- vtab_set_error(pVTab, "ONLY EQUALS (=) or NOT_EQUALS (!=) operators are allowed on boolean metadata columns.");
- goto done;
- }
- }
- pIdxInfo->aConstraintUsage[i].argvIndex = argvIndex++;
- pIdxInfo->aConstraintUsage[i].omit = 1;
- sqlite3_str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_METADATA_CONSTRAINT);
- sqlite3_str_appendchar(idxStr, 1, 'A' + metadata_idx);
- sqlite3_str_appendchar(idxStr, 1, value);
- sqlite3_str_appendchar(idxStr, 1, '_');
- }
- pIdxInfo->idxNum = iMatchVectorTerm;
- pIdxInfo->estimatedCost = 30.0;
- pIdxInfo->estimatedRows = 10;
- } else if (iRowidTerm >= 0) {
- sqlite3_str_appendchar(idxStr, 1, VEC0_QUERY_PLAN_POINT);
- pIdxInfo->aConstraintUsage[iRowidTerm].argvIndex = 1;
- pIdxInfo->aConstraintUsage[iRowidTerm].omit = 1;
- sqlite3_str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_POINT_ID);
- sqlite3_str_appendchar(idxStr, 3, '_');
- pIdxInfo->idxNum = pIdxInfo->colUsed;
- pIdxInfo->estimatedCost = 10.0;
- pIdxInfo->estimatedRows = 1;
- } else {
- sqlite3_str_appendchar(idxStr, 1, VEC0_QUERY_PLAN_FULLSCAN);
- pIdxInfo->estimatedCost = 3000000.0;
- pIdxInfo->estimatedRows = 100000;
- }
- pIdxInfo->idxStr = sqlite3_str_finish(idxStr);
- idxStr = NULL;
- if (!pIdxInfo->idxStr) {
- rc = SQLITE_OK;
- goto done;
- }
- pIdxInfo->needToFreeIdxStr = 1;
- rc = SQLITE_OK;
- done:
- if(idxStr) {
- sqlite3_str_finish(idxStr);
- }
- return rc;
- }
- // forward delcaration bc vec0Filter uses it
- static int vec0Next(sqlite3_vtab_cursor *cur);
- void merge_sorted_lists(f32 *a, i64 *a_rowids, i64 a_length, f32 *b,
- i64 *b_rowids, i32 *b_top_idxs, i64 b_length, f32 *out,
- i64 *out_rowids, i64 out_length, i64 *out_used) {
- // assert((a_length >= out_length) || (b_length >= out_length));
- i64 ptrA = 0;
- i64 ptrB = 0;
- for (int i = 0; i < out_length; i++) {
- if ((ptrA >= a_length) && (ptrB >= b_length)) {
- *out_used = i;
- return;
- }
- if (ptrA >= a_length) {
- out[i] = b[b_top_idxs[ptrB]];
- out_rowids[i] = b_rowids[b_top_idxs[ptrB]];
- ptrB++;
- } else if (ptrB >= b_length) {
- out[i] = a[ptrA];
- out_rowids[i] = a_rowids[ptrA];
- ptrA++;
- } else {
- if (a[ptrA] <= b[b_top_idxs[ptrB]]) {
- out[i] = a[ptrA];
- out_rowids[i] = a_rowids[ptrA];
- ptrA++;
- } else {
- out[i] = b[b_top_idxs[ptrB]];
- out_rowids[i] = b_rowids[b_top_idxs[ptrB]];
- ptrB++;
- }
- }
- }
- *out_used = out_length;
- }
- u8 *bitmap_new(i32 n) {
- assert(n % 8 == 0);
- u8 *p = sqlite3_malloc(n * sizeof(u8) / CHAR_BIT);
- if (p) {
- memset(p, 0, n * sizeof(u8) / CHAR_BIT);
- }
- return p;
- }
- u8 *bitmap_new_from(i32 n, u8 *from) {
- assert(n % 8 == 0);
- u8 *p = sqlite3_malloc(n * sizeof(u8) / CHAR_BIT);
- if (p) {
- memcpy(p, from, n / CHAR_BIT);
- }
- return p;
- }
- void bitmap_copy(u8 *base, u8 *from, i32 n) {
- assert(n % 8 == 0);
- memcpy(base, from, n / CHAR_BIT);
- }
- void bitmap_and_inplace(u8 *base, u8 *other, i32 n) {
- assert((n % 8) == 0);
- for (int i = 0; i < n / CHAR_BIT; i++) {
- base[i] = base[i] & other[i];
- }
- }
- void bitmap_set(u8 *bitmap, i32 position, int value) {
- if (value) {
- bitmap[position / CHAR_BIT] |= 1 << (position % CHAR_BIT);
- } else {
- bitmap[position / CHAR_BIT] &= ~(1 << (position % CHAR_BIT));
- }
- }
- int bitmap_get(u8 *bitmap, i32 position) {
- return (((bitmap[position / CHAR_BIT]) >> (position % CHAR_BIT)) & 1);
- }
- void bitmap_clear(u8 *bitmap, i32 n) {
- assert((n % 8) == 0);
- memset(bitmap, 0, n / CHAR_BIT);
- }
- void bitmap_fill(u8 *bitmap, i32 n) {
- assert((n % 8) == 0);
- memset(bitmap, 0xFF, n / CHAR_BIT);
- }
- /**
- * @brief Finds the minimum k items in distances, and writes the indicies to
- * out.
- *
- * @param distances input f32 array of size n, the items to consider.
- * @param n: size of distances array.
- * @param out: Output array of size k, will contain at most k element indicies
- * @param k: Size of output array
- * @return int
- */
- int min_idx(const f32 *distances, i32 n, u8 *candidates, i32 *out, i32 k,
- u8 *bTaken, i32 *k_used) {
- assert(k > 0);
- assert(k <= n);
- bitmap_clear(bTaken, n);
- for (int ik = 0; ik < k; ik++) {
- int min_idx = 0;
- while (min_idx < n &&
- (bitmap_get(bTaken, min_idx) || !bitmap_get(candidates, min_idx))) {
- min_idx++;
- }
- if (min_idx >= n) {
- *k_used = ik;
- return SQLITE_OK;
- }
- for (int i = 0; i < n; i++) {
- if (distances[i] <= distances[min_idx] && !bitmap_get(bTaken, i) &&
- (bitmap_get(candidates, i))) {
- min_idx = i;
- }
- }
- out[ik] = min_idx;
- bitmap_set(bTaken, min_idx, 1);
- }
- *k_used = k;
- return SQLITE_OK;
- }
- int vec0_get_metadata_text_long_value(
- vec0_vtab * p,
- sqlite3_stmt ** stmt,
- int metadata_idx,
- i64 rowid,
- int *n,
- char ** s) {
- int rc;
- if(!(*stmt)) {
- const char * zSql = sqlite3_mprintf("select data from " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " where rowid = ?", p->schemaName, p->tableName, metadata_idx);
- if(!zSql) {
- rc = SQLITE_NOMEM;
- goto done;
- }
- rc = sqlite3_prepare_v2(p->db, zSql, -1, stmt, NULL);
- sqlite3_free( (void *) zSql);
- if(rc != SQLITE_OK) {
- goto done;
- }
- }
- sqlite3_reset(*stmt);
- sqlite3_bind_int64(*stmt, 1, rowid);
- rc = sqlite3_step(*stmt);
- if(rc != SQLITE_ROW) {
- rc = SQLITE_ERROR;
- goto done;
- }
- *s = (char *) sqlite3_column_text(*stmt, 0);
- *n = sqlite3_column_bytes(*stmt, 0);
- rc = SQLITE_OK;
- done:
- return rc;
- }
- /**
- * @brief Crete at "iterator" (sqlite3_stmt) of chunks with the given constraints
- *
- * Any VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT values in idxStr/argv will be applied
- * as WHERE constraints in the underlying stmt SQL, and any consumer of the stmt
- * can freely step through the stmt with all constraints satisfied.
- *
- * @param p - vec0_vtab
- * @param idxStr - the xBestIndex/xFilter idxstr containing VEC0_IDXSTR values
- * @param argc - number of argv values from xFilter
- * @param argv - array of sqlite3_value from xFilter
- * @param outStmt - output sqlite3_stmt of chunks with all filters applied
- * @return int SQLITE_OK on success, error code otherwise
- */
- int vec0_chunks_iter(vec0_vtab * p, const char * idxStr, int argc, sqlite3_value ** argv, sqlite3_stmt** outStmt) {
- // always null terminated, enforced by SQLite
- int idxStrLength = strlen(idxStr);
- // "1" refers to the initial vec0_query_plan char, 4 is the number of chars per "element"
- int numValueEntries = (idxStrLength-1) / 4;
- assert(argc == numValueEntries);
- int rc;
- sqlite3_str * s = sqlite3_str_new(NULL);
- sqlite3_str_appendf(s, "select chunk_id, validity, rowids "
- " from " VEC0_SHADOW_CHUNKS_NAME,
- p->schemaName, p->tableName);
- int appendedWhere = 0;
- for(int i = 0; i < numValueEntries; i++) {
- int idx = 1 + (i * 4);
- char kind = idxStr[idx + 0];
- if(kind != VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT) {
- continue;
- }
- int partition_idx = idxStr[idx + 1] - 'A';
- int operator = idxStr[idx + 2];
- // idxStr[idx + 3] is just null, a '_' placeholder
- if(!appendedWhere) {
- sqlite3_str_appendall(s, " WHERE ");
- appendedWhere = 1;
- }else {
- sqlite3_str_appendall(s, " AND ");
- }
- switch(operator) {
- case VEC0_PARTITION_OPERATOR_EQ:
- sqlite3_str_appendf(s, " partition%02d = ? ", partition_idx);
- break;
- case VEC0_PARTITION_OPERATOR_GT:
- sqlite3_str_appendf(s, " partition%02d > ? ", partition_idx);
- break;
- case VEC0_PARTITION_OPERATOR_LE:
- sqlite3_str_appendf(s, " partition%02d <= ? ", partition_idx);
- break;
- case VEC0_PARTITION_OPERATOR_LT:
- sqlite3_str_appendf(s, " partition%02d < ? ", partition_idx);
- break;
- case VEC0_PARTITION_OPERATOR_GE:
- sqlite3_str_appendf(s, " partition%02d >= ? ", partition_idx);
- break;
- case VEC0_PARTITION_OPERATOR_NE:
- sqlite3_str_appendf(s, " partition%02d != ? ", partition_idx);
- break;
- default: {
- char * zSql = sqlite3_str_finish(s);
- sqlite3_free(zSql);
- return SQLITE_ERROR;
- }
- }
- }
- char *zSql = sqlite3_str_finish(s);
- if (!zSql) {
- return SQLITE_NOMEM;
- }
- rc = sqlite3_prepare_v2(p->db, zSql, -1, outStmt, NULL);
- sqlite3_free(zSql);
- if(rc != SQLITE_OK) {
- return rc;
- }
- int n = 1;
- for(int i = 0; i < numValueEntries; i++) {
- int idx = 1 + (i * 4);
- char kind = idxStr[idx + 0];
- if(kind != VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT) {
- continue;
- }
- sqlite3_bind_value(*outStmt, n++, argv[i]);
- }
- return rc;
- }
- // a single `xxx in (...)` constraint on a metadata column. TEXT or INTEGER only for now.
- struct Vec0MetadataIn{
- // index of argv[i]` the constraint is on
- int argv_idx;
- // metadata column index of the constraint, derived from idxStr + argv_idx
- int metadata_idx;
- // array of the copied `(...)` values from sqlite3_vtab_in_first()/sqlite3_vtab_in_next()
- struct Array array;
- };
- // Array elements for `xxx in (...)` values for a text column. basically just a string
- struct Vec0MetadataInTextEntry {
- int n;
- char * zString;
- };
- int vec0_metadata_filter_text(vec0_vtab * p, sqlite3_value * value, const void * buffer, int size, vec0_metadata_operator op, u8* b, int metadata_idx, int chunk_rowid, struct Array * aMetadataIn, int argv_idx) {
- int rc;
- sqlite3_stmt * stmt = NULL;
- i64 * rowids = NULL;
- sqlite3_blob * rowidsBlob;
- const char * sTarget = (const char *) sqlite3_value_text(value);
- int nTarget = sqlite3_value_bytes(value);
- // TODO(perf): only text metadata news the rowids BLOB. Make it so that
- // rowids BLOB is re-used when multiple fitlers on text columns,
- // ex "name BETWEEN 'a' and 'b'""
- rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName, "rowids", chunk_rowid, 0, &rowidsBlob);
- if(rc != SQLITE_OK) {
- return rc;
- }
- assert(sqlite3_blob_bytes(rowidsBlob) % sizeof(i64) == 0);
- assert((sqlite3_blob_bytes(rowidsBlob) / sizeof(i64)) == size);
- rowids = sqlite3_malloc(sqlite3_blob_bytes(rowidsBlob));
- if(!rowids) {
- sqlite3_blob_close(rowidsBlob);
- return SQLITE_NOMEM;
- }
- rc = sqlite3_blob_read(rowidsBlob, rowids, sqlite3_blob_bytes(rowidsBlob), 0);
- if(rc != SQLITE_OK) {
- sqlite3_blob_close(rowidsBlob);
- return rc;
- }
- sqlite3_blob_close(rowidsBlob);
- switch(op) {
- int nPrefix;
- char * sPrefix;
- char *sFull;
- int nFull;
- u8 * view;
- case VEC0_METADATA_OPERATOR_EQ: {
- for(int i = 0; i < size; i++) {
- view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
- nPrefix = ((int*) view)[0];
- sPrefix = (char *) &view[4];
- // for EQ the text lengths must match
- if(nPrefix != nTarget) {
- bitmap_set(b, i, 0);
- continue;
- }
- int cmpPrefix = strncmp(sPrefix, sTarget, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH));
- // for short strings, use the prefix comparison direclty
- if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
- bitmap_set(b, i, cmpPrefix == 0);
- continue;
- }
- // for EQ on longs strings, the prefix must match
- if(cmpPrefix) {
- bitmap_set(b, i, 0);
- continue;
- }
- // consult the full string
- rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
- if(rc != SQLITE_OK) {
- goto done;
- }
- if(nPrefix != nFull) {
- rc = SQLITE_ERROR;
- goto done;
- }
- bitmap_set(b, i, strncmp(sFull, sTarget, nFull) == 0);
- }
- break;
- }
- case VEC0_METADATA_OPERATOR_NE: {
- for(int i = 0; i < size; i++) {
- view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
- nPrefix = ((int*) view)[0];
- sPrefix = (char *) &view[4];
- // for NE if text lengths dont match, it never will
- if(nPrefix != nTarget) {
- bitmap_set(b, i, 1);
- continue;
- }
- int cmpPrefix = strncmp(sPrefix, sTarget, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH));
- // for short strings, use the prefix comparison direclty
- if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
- bitmap_set(b, i, cmpPrefix != 0);
- continue;
- }
- // for NE on longs strings, if prefixes dont match, then long string wont
- if(cmpPrefix) {
- bitmap_set(b, i, 1);
- continue;
- }
- // consult the full string
- rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
- if(rc != SQLITE_OK) {
- goto done;
- }
- if(nPrefix != nFull) {
- rc = SQLITE_ERROR;
- goto done;
- }
- bitmap_set(b, i, strncmp(sFull, sTarget, nFull) != 0);
- }
- break;
- }
- case VEC0_METADATA_OPERATOR_GT: {
- for(int i = 0; i < size; i++) {
- view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
- nPrefix = ((int*) view)[0];
- sPrefix = (char *) &view[4];
- int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget));
- if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
- // if prefix match, check which is longer
- if(cmpPrefix == 0) {
- bitmap_set(b, i, nPrefix > nTarget);
- }
- else {
- bitmap_set(b, i, cmpPrefix > 0);
- }
- continue;
- }
- // TODO(perf): may not need to compare full text in some cases
- rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
- if(rc != SQLITE_OK) {
- goto done;
- }
- if(nPrefix != nFull) {
- rc = SQLITE_ERROR;
- goto done;
- }
- bitmap_set(b, i, strncmp(sFull, sTarget, nFull) > 0);
- }
- break;
- }
- case VEC0_METADATA_OPERATOR_GE: {
- for(int i = 0; i < size; i++) {
- view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
- nPrefix = ((int*) view)[0];
- sPrefix = (char *) &view[4];
- int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget));
- if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
- // if prefix match, check which is longer
- if(cmpPrefix == 0) {
- bitmap_set(b, i, nPrefix >= nTarget);
- }
- else {
- bitmap_set(b, i, cmpPrefix >= 0);
- }
- continue;
- }
- // TODO(perf): may not need to compare full text in some cases
- rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
- if(rc != SQLITE_OK) {
- goto done;
- }
- if(nPrefix != nFull) {
- rc = SQLITE_ERROR;
- goto done;
- }
- bitmap_set(b, i, strncmp(sFull, sTarget, nFull) >= 0);
- }
- break;
- }
- case VEC0_METADATA_OPERATOR_LE: {
- for(int i = 0; i < size; i++) {
- view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
- nPrefix = ((int*) view)[0];
- sPrefix = (char *) &view[4];
- int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget));
- if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
- // if prefix match, check which is longer
- if(cmpPrefix == 0) {
- bitmap_set(b, i, nPrefix <= nTarget);
- }
- else {
- bitmap_set(b, i, cmpPrefix <= 0);
- }
- continue;
- }
- // TODO(perf): may not need to compare full text in some cases
- rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
- if(rc != SQLITE_OK) {
- goto done;
- }
- if(nPrefix != nFull) {
- rc = SQLITE_ERROR;
- goto done;
- }
- bitmap_set(b, i, strncmp(sFull, sTarget, nFull) <= 0);
- }
- break;
- }
- case VEC0_METADATA_OPERATOR_LT: {
- for(int i = 0; i < size; i++) {
- view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
- nPrefix = ((int*) view)[0];
- sPrefix = (char *) &view[4];
- int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget));
- if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
- // if prefix match, check which is longer
- if(cmpPrefix == 0) {
- bitmap_set(b, i, nPrefix < nTarget);
- }
- else {
- bitmap_set(b, i, cmpPrefix < 0);
- }
- continue;
- }
- // TODO(perf): may not need to compare full text in some cases
- rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
- if(rc != SQLITE_OK) {
- goto done;
- }
- if(nPrefix != nFull) {
- rc = SQLITE_ERROR;
- goto done;
- }
- bitmap_set(b, i, strncmp(sFull, sTarget, nFull) < 0);
- }
- break;
- }
- case VEC0_METADATA_OPERATOR_IN: {
- size_t metadataInIdx = -1;
- for(size_t i = 0; i < aMetadataIn->length; i++) {
- struct Vec0MetadataIn * metadataIn = &(((struct Vec0MetadataIn *) aMetadataIn->z)[i]);
- if(metadataIn->argv_idx == argv_idx) {
- metadataInIdx = i;
- break;
- }
- }
- if(metadataInIdx < 0) {
- rc = SQLITE_ERROR;
- goto done;
- }
- struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[metadataInIdx];
- struct Array * aTarget = &(metadataIn->array);
- int nPrefix;
- char * sPrefix;
- char *sFull;
- int nFull;
- u8 * view;
- for(int i = 0; i < size; i++) {
- view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
- nPrefix = ((int*) view)[0];
- sPrefix = (char *) &view[4];
- for(size_t target_idx = 0; target_idx < aTarget->length; target_idx++) {
- struct Vec0MetadataInTextEntry * entry = &(((struct Vec0MetadataInTextEntry*)aTarget->z)[target_idx]);
- if(entry->n != nPrefix) {
- continue;
- }
- int cmpPrefix = strncmp(sPrefix, entry->zString, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH));
- if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
- if(cmpPrefix == 0) {
- bitmap_set(b, i, 1);
- break;
- }
- continue;
- }
- if(cmpPrefix) {
- continue;
- }
- rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
- if(rc != SQLITE_OK) {
- goto done;
- }
- if(nPrefix != nFull) {
- rc = SQLITE_ERROR;
- goto done;
- }
- if(strncmp(sFull, entry->zString, nFull) == 0) {
- bitmap_set(b, i, 1);
- break;
- }
- }
- }
- break;
- }
- }
- rc = SQLITE_OK;
- done:
- sqlite3_finalize(stmt);
- sqlite3_free(rowids);
- return rc;
- }
- /**
- * @brief Fill in bitmap of chunk values, whether or not the values match a metadata constraint
- *
- * @param p vec0_vtab
- * @param metadata_idx index of the metatadata column to perfrom constraints on
- * @param value sqlite3_value of the constraints value
- * @param blob sqlite3_blob that is already opened on the metdata column's shadow chunk table
- * @param chunk_rowid rowid of the chunk to calculate on
- * @param b pre-allocated and zero'd out bitmap to write results to
- * @param size size of the chunk
- * @return int SQLITE_OK on success, error code otherwise
- */
- int vec0_set_metadata_filter_bitmap(
- vec0_vtab *p,
- int metadata_idx,
- vec0_metadata_operator op,
- sqlite3_value * value,
- sqlite3_blob * blob,
- i64 chunk_rowid,
- u8* b,
- int size,
- struct Array * aMetadataIn, int argv_idx) {
- // TODO: shouldn't this skip in-valid entries from the chunk's validity bitmap?
- int rc;
- rc = sqlite3_blob_reopen(blob, chunk_rowid);
- if(rc != SQLITE_OK) {
- return rc;
- }
- vec0_metadata_column_kind kind = p->metadata_columns[metadata_idx].kind;
- int szMatch = 0;
- int blobSize = sqlite3_blob_bytes(blob);
- switch(kind) {
- case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
- szMatch = blobSize == size / CHAR_BIT;
- break;
- }
- case VEC0_METADATA_COLUMN_KIND_INTEGER: {
- szMatch = blobSize == size * sizeof(i64);
- break;
- }
- case VEC0_METADATA_COLUMN_KIND_FLOAT: {
- szMatch = blobSize == size * sizeof(double);
- break;
- }
- case VEC0_METADATA_COLUMN_KIND_TEXT: {
- szMatch = blobSize == size * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH;
- break;
- }
- }
- if(!szMatch) {
- return SQLITE_ERROR;
- }
- void * buffer = sqlite3_malloc(blobSize);
- if(!buffer) {
- return SQLITE_NOMEM;
- }
- rc = sqlite3_blob_read(blob, buffer, blobSize, 0);
- if(rc != SQLITE_OK) {
- goto done;
- }
- switch(kind) {
- case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
- int target = sqlite3_value_int(value);
- if( (target && op == VEC0_METADATA_OPERATOR_EQ) || (!target && op == VEC0_METADATA_OPERATOR_NE)) {
- for(int i = 0; i < size; i++) { bitmap_set(b, i, bitmap_get((u8*) buffer, i)); }
- }
- else {
- for(int i = 0; i < size; i++) { bitmap_set(b, i, !bitmap_get((u8*) buffer, i)); }
- }
- break;
- }
- case VEC0_METADATA_COLUMN_KIND_INTEGER: {
- i64 * array = (i64*) buffer;
- i64 target = sqlite3_value_int64(value);
- switch(op) {
- case VEC0_METADATA_OPERATOR_EQ: {
- for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] == target); }
- break;
- }
- case VEC0_METADATA_OPERATOR_GT: {
- for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] > target); }
- break;
- }
- case VEC0_METADATA_OPERATOR_LE: {
- for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] <= target); }
- break;
- }
- case VEC0_METADATA_OPERATOR_LT: {
- for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] < target); }
- break;
- }
- case VEC0_METADATA_OPERATOR_GE: {
- for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] >= target); }
- break;
- }
- case VEC0_METADATA_OPERATOR_NE: {
- for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] != target); }
- break;
- }
- case VEC0_METADATA_OPERATOR_IN: {
- int metadataInIdx = -1;
- for(size_t i = 0; i < aMetadataIn->length; i++) {
- struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[i];
- if(metadataIn->argv_idx == argv_idx) {
- metadataInIdx = i;
- break;
- }
- }
- if(metadataInIdx < 0) {
- rc = SQLITE_ERROR;
- goto done;
- }
- struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[metadataInIdx];
- struct Array * aTarget = &(metadataIn->array);
- for(int i = 0; i < size; i++) {
- for(size_t target_idx = 0; target_idx < aTarget->length; target_idx++) {
- if( ((i64*)aTarget->z)[target_idx] == array[i]) {
- bitmap_set(b, i, 1);
- break;
- }
- }
- }
- break;
- }
- }
- break;
- }
- case VEC0_METADATA_COLUMN_KIND_FLOAT: {
- double * array = (double*) buffer;
- double target = sqlite3_value_double(value);
- switch(op) {
- case VEC0_METADATA_OPERATOR_EQ: {
- for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] == target); }
- break;
- }
- case VEC0_METADATA_OPERATOR_GT: {
- for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] > target); }
- break;
- }
- case VEC0_METADATA_OPERATOR_LE: {
- for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] <= target); }
- break;
- }
- case VEC0_METADATA_OPERATOR_LT: {
- for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] < target); }
- break;
- }
- case VEC0_METADATA_OPERATOR_GE: {
- for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] >= target); }
- break;
- }
- case VEC0_METADATA_OPERATOR_NE: {
- for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] != target); }
- break;
- }
- case VEC0_METADATA_OPERATOR_IN: {
- // should never be reached
- break;
- }
- }
- break;
- }
- case VEC0_METADATA_COLUMN_KIND_TEXT: {
- rc = vec0_metadata_filter_text(p, value, buffer, size, op, b, metadata_idx, chunk_rowid, aMetadataIn, argv_idx);
- if(rc != SQLITE_OK) {
- goto done;
- }
- break;
- }
- }
- done:
- sqlite3_free(buffer);
- return rc;
- }
- int vec0Filter_knn_chunks_iter(vec0_vtab *p, sqlite3_stmt *stmtChunks,
- struct VectorColumnDefinition *vector_column,
- int vectorColumnIdx, struct Array *arrayRowidsIn,
- struct Array * aMetadataIn,
- const char * idxStr, int argc, sqlite3_value ** argv,
- void *queryVector, i64 k, i64 **out_topk_rowids,
- f32 **out_topk_distances, i64 *out_used) {
- // for each chunk, get top min(k, chunk_size) rowid + distances to query vec.
- // then reconcile all topk_chunks for a true top k.
- // output only rowids + distances for now
- int rc = SQLITE_OK;
- sqlite3_blob *blobVectors = NULL;
- void *baseVectors = NULL; // memory: chunk_size * dimensions * element_size
- // OWNED BY CALLER ON SUCCESS
- i64 *topk_rowids = NULL; // memory: k * 4
- // OWNED BY CALLER ON SUCCESS
- f32 *topk_distances = NULL; // memory: k * 4
- i64 *tmp_topk_rowids = NULL; // memory: k * 4
- f32 *tmp_topk_distances = NULL; // memory: k * 4
- f32 *chunk_distances = NULL; // memory: chunk_size * 4
- u8 *b = NULL; // memory: chunk_size / 8
- u8 *bTaken = NULL; // memory: chunk_size / 8
- i32 *chunk_topk_idxs = NULL; // memory: k * 4
- u8 *bmRowids = NULL; // memory: chunk_size / 8
- u8 *bmMetadata = NULL; // memory: chunk_size / 8
- // // total: a lot???
- // 6 * (k * 4) + (k * 2) + (chunk_size / 8) + (chunk_size * dimensions * 4)
- topk_rowids = sqlite3_malloc(k * sizeof(i64));
- if (!topk_rowids) {
- rc = SQLITE_NOMEM;
- goto cleanup;
- }
- memset(topk_rowids, 0, k * sizeof(i64));
- topk_distances = sqlite3_malloc(k * sizeof(f32));
- if (!topk_distances) {
- rc = SQLITE_NOMEM;
- goto cleanup;
- }
- memset(topk_distances, 0, k * sizeof(f32));
- tmp_topk_rowids = sqlite3_malloc(k * sizeof(i64));
- if (!tmp_topk_rowids) {
- rc = SQLITE_NOMEM;
- goto cleanup;
- }
- memset(tmp_topk_rowids, 0, k * sizeof(i64));
- tmp_topk_distances = sqlite3_malloc(k * sizeof(f32));
- if (!tmp_topk_distances) {
- rc = SQLITE_NOMEM;
- goto cleanup;
- }
- memset(tmp_topk_distances, 0, k * sizeof(f32));
- i64 k_used = 0;
- i64 baseVectorsSize = p->chunk_size * vector_column_byte_size(*vector_column);
- baseVectors = sqlite3_malloc(baseVectorsSize);
- if (!baseVectors) {
- rc = SQLITE_NOMEM;
- goto cleanup;
- }
- chunk_distances = sqlite3_malloc(p->chunk_size * sizeof(f32));
- if (!chunk_distances) {
- rc = SQLITE_NOMEM;
- goto cleanup;
- }
- b = bitmap_new(p->chunk_size);
- if (!b) {
- rc = SQLITE_NOMEM;
- goto cleanup;
- }
- bTaken = bitmap_new(p->chunk_size);
- if (!bTaken) {
- rc = SQLITE_NOMEM;
- goto cleanup;
- }
- chunk_topk_idxs = sqlite3_malloc(k * sizeof(i32));
- if (!chunk_topk_idxs) {
- rc = SQLITE_NOMEM;
- goto cleanup;
- }
- bmRowids = arrayRowidsIn ? bitmap_new(p->chunk_size) : NULL;
- if (arrayRowidsIn && !bmRowids) {
- rc = SQLITE_NOMEM;
- goto cleanup;
- }
- sqlite3_blob * metadataBlobs[VEC0_MAX_METADATA_COLUMNS];
- memset(metadataBlobs, 0, sizeof(sqlite3_blob*) * VEC0_MAX_METADATA_COLUMNS);
- bmMetadata = bitmap_new(p->chunk_size);
- if(!bmMetadata) {
- rc = SQLITE_NOMEM;
- goto cleanup;
- }
- int idxStrLength = strlen(idxStr);
- int numValueEntries = (idxStrLength-1) / 4;
- assert(numValueEntries == argc);
- int hasMetadataFilters = 0;
- for(int i = 0; i < argc; i++) {
- int idx = 1 + (i * 4);
- char kind = idxStr[idx + 0];
- if(kind == VEC0_IDXSTR_KIND_METADATA_CONSTRAINT) {
- hasMetadataFilters = 1;
- break;
- }
- }
- while (true) {
- rc = sqlite3_step(stmtChunks);
- if (rc == SQLITE_DONE) {
- break;
- }
- if (rc != SQLITE_ROW) {
- vtab_set_error(&p->base, "chunks iter error");
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- memset(chunk_distances, 0, p->chunk_size * sizeof(f32));
- memset(chunk_topk_idxs, 0, k * sizeof(i32));
- bitmap_clear(b, p->chunk_size);
- i64 chunk_id = sqlite3_column_int64(stmtChunks, 0);
- unsigned char *chunkValidity =
- (unsigned char *)sqlite3_column_blob(stmtChunks, 1);
- i64 validitySize = sqlite3_column_bytes(stmtChunks, 1);
- if (validitySize != p->chunk_size / CHAR_BIT) {
- // IMP: V05271_22109
- vtab_set_error(
- &p->base,
- "chunk validity size doesn't match - expected %lld, found %lld",
- p->chunk_size / CHAR_BIT, validitySize);
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- i64 *chunkRowids = (i64 *)sqlite3_column_blob(stmtChunks, 2);
- i64 rowidsSize = sqlite3_column_bytes(stmtChunks, 2);
- if (rowidsSize != p->chunk_size * sizeof(i64)) {
- // IMP: V02796_19635
- vtab_set_error(&p->base, "rowids size doesn't match");
- vtab_set_error(
- &p->base,
- "chunk rowids size doesn't match - expected %lld, found %lld",
- p->chunk_size * sizeof(i64), rowidsSize);
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- // open the vector chunk blob for the current chunk
- rc = sqlite3_blob_open(p->db, p->schemaName,
- p->shadowVectorChunksNames[vectorColumnIdx],
- "vectors", chunk_id, 0, &blobVectors);
- if (rc != SQLITE_OK) {
- vtab_set_error(&p->base, "could not open vectors blob for chunk %lld",
- chunk_id);
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- i64 currentBaseVectorsSize = sqlite3_blob_bytes(blobVectors);
- i64 expectedBaseVectorsSize =
- p->chunk_size * vector_column_byte_size(*vector_column);
- if (currentBaseVectorsSize != expectedBaseVectorsSize) {
- // IMP: V16465_00535
- vtab_set_error(
- &p->base,
- "vectors blob size doesn't match - expected %lld, found %lld",
- expectedBaseVectorsSize, currentBaseVectorsSize);
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- rc = sqlite3_blob_read(blobVectors, baseVectors, currentBaseVectorsSize, 0);
- if (rc != SQLITE_OK) {
- vtab_set_error(&p->base, "vectors blob read error for %lld", chunk_id);
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- bitmap_copy(b, chunkValidity, p->chunk_size);
- if (arrayRowidsIn) {
- bitmap_clear(bmRowids, p->chunk_size);
- for (int i = 0; i < p->chunk_size; i++) {
- if (!bitmap_get(chunkValidity, i)) {
- continue;
- }
- i64 rowid = chunkRowids[i];
- void *in = bsearch(&rowid, arrayRowidsIn->z, arrayRowidsIn->length,
- sizeof(i64), _cmp);
- bitmap_set(bmRowids, i, in ? 1 : 0);
- }
- bitmap_and_inplace(b, bmRowids, p->chunk_size);
- }
- if(hasMetadataFilters) {
- for(int i = 0; i < argc; i++) {
- int idx = 1 + (i * 4);
- char kind = idxStr[idx + 0];
- if(kind != VEC0_IDXSTR_KIND_METADATA_CONSTRAINT) {
- continue;
- }
- int metadata_idx = idxStr[idx + 1] - 'A';
- int operator = idxStr[idx + 2];
- if(!metadataBlobs[metadata_idx]) {
- rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_idx], "data", chunk_id, 0, &metadataBlobs[metadata_idx]);
- vtab_set_error(&p->base, "Could not open metadata blob");
- if(rc != SQLITE_OK) {
- goto cleanup;
- }
- }
- bitmap_clear(bmMetadata, p->chunk_size);
- rc = vec0_set_metadata_filter_bitmap(p, metadata_idx, operator, argv[i], metadataBlobs[metadata_idx], chunk_id, bmMetadata, p->chunk_size, aMetadataIn, i);
- if(rc != SQLITE_OK) {
- vtab_set_error(&p->base, "Could not filter metadata fields");
- if(rc != SQLITE_OK) {
- goto cleanup;
- }
- }
- bitmap_and_inplace(b, bmMetadata, p->chunk_size);
- }
- }
- for (int i = 0; i < p->chunk_size; i++) {
- if (!bitmap_get(b, i)) {
- continue;
- };
- f32 result;
- switch (vector_column->element_type) {
- case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
- const f32 *base_i =
- ((f32 *)baseVectors) + (i * vector_column->dimensions);
- switch (vector_column->distance_metric) {
- case VEC0_DISTANCE_METRIC_L2: {
- result = distance_l2_sqr_float(base_i, (f32 *)queryVector,
- &vector_column->dimensions);
- break;
- }
- case VEC0_DISTANCE_METRIC_L1: {
- result = distance_l1_f32(base_i, (f32 *)queryVector,
- &vector_column->dimensions);
- break;
- }
- case VEC0_DISTANCE_METRIC_COSINE: {
- result = distance_cosine_float(base_i, (f32 *)queryVector,
- &vector_column->dimensions);
- break;
- }
- }
- break;
- }
- case SQLITE_VEC_ELEMENT_TYPE_INT8: {
- const i8 *base_i =
- ((i8 *)baseVectors) + (i * vector_column->dimensions);
- switch (vector_column->distance_metric) {
- case VEC0_DISTANCE_METRIC_L2: {
- result = distance_l2_sqr_int8(base_i, (i8 *)queryVector,
- &vector_column->dimensions);
- break;
- }
- case VEC0_DISTANCE_METRIC_L1: {
- result = distance_l1_int8(base_i, (i8 *)queryVector,
- &vector_column->dimensions);
- break;
- }
- case VEC0_DISTANCE_METRIC_COSINE: {
- result = distance_cosine_int8(base_i, (i8 *)queryVector,
- &vector_column->dimensions);
- break;
- }
- }
- break;
- }
- case SQLITE_VEC_ELEMENT_TYPE_BIT: {
- const u8 *base_i =
- ((u8 *)baseVectors) + (i * (vector_column->dimensions / CHAR_BIT));
- result = distance_hamming(base_i, (u8 *)queryVector,
- &vector_column->dimensions);
- break;
- }
- }
- chunk_distances[i] = result;
- }
- int used1;
- min_idx(chunk_distances, p->chunk_size, b, chunk_topk_idxs,
- min(k, p->chunk_size), bTaken, &used1);
- i64 used;
- merge_sorted_lists(topk_distances, topk_rowids, k_used, chunk_distances,
- chunkRowids, chunk_topk_idxs,
- min(min(k, p->chunk_size), used1), tmp_topk_distances,
- tmp_topk_rowids, k, &used);
- for (int i = 0; i < used; i++) {
- topk_rowids[i] = tmp_topk_rowids[i];
- topk_distances[i] = tmp_topk_distances[i];
- }
- k_used = used;
- // blobVectors is always opened with read-only permissions, so this never
- // fails.
- sqlite3_blob_close(blobVectors);
- blobVectors = NULL;
- }
- *out_topk_rowids = topk_rowids;
- *out_topk_distances = topk_distances;
- *out_used = k_used;
- rc = SQLITE_OK;
- cleanup:
- if (rc != SQLITE_OK) {
- sqlite3_free(topk_rowids);
- sqlite3_free(topk_distances);
- }
- sqlite3_free(chunk_topk_idxs);
- sqlite3_free(tmp_topk_rowids);
- sqlite3_free(tmp_topk_distances);
- sqlite3_free(b);
- sqlite3_free(bTaken);
- sqlite3_free(bmRowids);
- sqlite3_free(baseVectors);
- sqlite3_free(chunk_distances);
- sqlite3_free(bmMetadata);
- for(int i = 0; i < VEC0_MAX_METADATA_COLUMNS; i++) {
- sqlite3_blob_close(metadataBlobs[i]);
- }
- // blobVectors is always opened with read-only permissions, so this never
- // fails.
- sqlite3_blob_close(blobVectors);
- return rc;
- }
- int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum,
- const char *idxStr, int argc, sqlite3_value **argv) {
- assert(argc == (strlen(idxStr)-1) / 4);
- int rc;
- struct vec0_query_knn_data *knn_data;
- int vectorColumnIdx = idxNum;
- struct VectorColumnDefinition *vector_column =
- &p->vector_columns[vectorColumnIdx];
- struct Array *arrayRowidsIn = NULL;
- sqlite3_stmt *stmtChunks = NULL;
- void *queryVector;
- size_t dimensions;
- enum VectorElementType elementType;
- vector_cleanup queryVectorCleanup = vector_cleanup_noop;
- char *pzError;
- knn_data = sqlite3_malloc(sizeof(*knn_data));
- if (!knn_data) {
- return SQLITE_NOMEM;
- }
- memset(knn_data, 0, sizeof(*knn_data));
- // array of `struct Vec0MetadataIn`, IF there are any `xxx in (...)` metadata constraints
- struct Array * aMetadataIn = NULL;
- int query_idx =-1;
- int k_idx = -1;
- int rowid_in_idx = -1;
- for(int i = 0; i < argc; i++) {
- if(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_KNN_MATCH) {
- query_idx = i;
- }
- if(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_KNN_K) {
- k_idx = i;
- }
- if(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_KNN_ROWID_IN) {
- rowid_in_idx = i;
- }
- }
- assert(query_idx >= 0);
- assert(k_idx >= 0);
- // make sure the query vector matches the vector column (type dimensions etc.)
- rc = vector_from_value(argv[query_idx], &queryVector, &dimensions, &elementType,
- &queryVectorCleanup, &pzError);
- if (rc != SQLITE_OK) {
- vtab_set_error(&p->base,
- "Query vector on the \"%.*s\" column is invalid: %z",
- vector_column->name_length, vector_column->name, pzError);
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- if (elementType != vector_column->element_type) {
- vtab_set_error(
- &p->base,
- "Query vector for the \"%.*s\" column is expected to be of type "
- "%s, but a %s vector was provided.",
- vector_column->name_length, vector_column->name,
- vector_subtype_name(vector_column->element_type),
- vector_subtype_name(elementType));
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- if (dimensions != vector_column->dimensions) {
- vtab_set_error(
- &p->base,
- "Dimension mismatch for query vector for the \"%.*s\" column. "
- "Expected %d dimensions but received %d.",
- vector_column->name_length, vector_column->name,
- vector_column->dimensions, dimensions);
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- i64 k = sqlite3_value_int64(argv[k_idx]);
- if (k < 0) {
- vtab_set_error(
- &p->base, "k value in knn queries must be greater than or equal to 0.");
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- #define SQLITE_VEC_VEC0_K_MAX 4096
- if (k > SQLITE_VEC_VEC0_K_MAX) {
- vtab_set_error(
- &p->base,
- "k value in knn query too large, provided %lld and the limit is %lld",
- k, SQLITE_VEC_VEC0_K_MAX);
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- if (k == 0) {
- knn_data->k = 0;
- pCur->knn_data = knn_data;
- pCur->query_plan = VEC0_QUERY_PLAN_KNN;
- rc = SQLITE_OK;
- goto cleanup;
- }
- // handle when a `rowid in (...)` operation was provided
- // Array of all the rowids that appear in any `rowid in (...)` constraint.
- // NULL if none were provided, which means a "full" scan.
- #if COMPILER_SUPPORTS_VTAB_IN
- if (rowid_in_idx >= 0) {
- sqlite3_value *item;
- int rc;
- arrayRowidsIn = sqlite3_malloc(sizeof(*arrayRowidsIn));
- if (!arrayRowidsIn) {
- rc = SQLITE_NOMEM;
- goto cleanup;
- }
- memset(arrayRowidsIn, 0, sizeof(*arrayRowidsIn));
- rc = array_init(arrayRowidsIn, sizeof(i64), 32);
- if (rc != SQLITE_OK) {
- goto cleanup;
- }
- for (rc = sqlite3_vtab_in_first(argv[rowid_in_idx], &item); rc == SQLITE_OK && item;
- rc = sqlite3_vtab_in_next(argv[rowid_in_idx], &item)) {
- i64 rowid;
- if (p->pkIsText) {
- rc = vec0_rowid_from_id(p, item, &rowid);
- if (rc != SQLITE_OK) {
- goto cleanup;
- }
- } else {
- rowid = sqlite3_value_int64(item);
- }
- rc = array_append(arrayRowidsIn, &rowid);
- if (rc != SQLITE_OK) {
- goto cleanup;
- }
- }
- if (rc != SQLITE_DONE) {
- vtab_set_error(&p->base, "error processing rowid in (...) array");
- goto cleanup;
- }
- qsort(arrayRowidsIn->z, arrayRowidsIn->length, arrayRowidsIn->element_size,
- _cmp);
- }
- #endif
- #if COMPILER_SUPPORTS_VTAB_IN
- for(int i = 0; i < argc; i++) {
- if(!(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_METADATA_CONSTRAINT && idxStr[1 + (i*4) + 2] == VEC0_METADATA_OPERATOR_IN)) {
- continue;
- }
- int metadata_idx = idxStr[1 + (i*4) + 1] - 'A';
- if(!aMetadataIn) {
- aMetadataIn = sqlite3_malloc(sizeof(*aMetadataIn));
- if(!aMetadataIn) {
- rc = SQLITE_NOMEM;
- goto cleanup;
- }
- memset(aMetadataIn, 0, sizeof(*aMetadataIn));
- rc = array_init(aMetadataIn, sizeof(struct Vec0MetadataIn), 8);
- if(rc != SQLITE_OK) {
- goto cleanup;
- }
- }
- struct Vec0MetadataIn item;
- memset(&item, 0, sizeof(item));
- item.metadata_idx=metadata_idx;
- item.argv_idx = i;
- switch(p->metadata_columns[metadata_idx].kind) {
- case VEC0_METADATA_COLUMN_KIND_INTEGER: {
- rc = array_init(&item.array, sizeof(i64), 16);
- if(rc != SQLITE_OK) {
- goto cleanup;
- }
- sqlite3_value *entry;
- for (rc = sqlite3_vtab_in_first(argv[i], &entry); rc == SQLITE_OK && entry; rc = sqlite3_vtab_in_next(argv[i], &entry)) {
- i64 v = sqlite3_value_int64(entry);
- rc = array_append(&item.array, &v);
- if (rc != SQLITE_OK) {
- goto cleanup;
- }
- }
- if (rc != SQLITE_DONE) {
- vtab_set_error(&p->base, "Error fetching next value in `x in (...)` integer expression");
- goto cleanup;
- }
- break;
- }
- case VEC0_METADATA_COLUMN_KIND_TEXT: {
- rc = array_init(&item.array, sizeof(struct Vec0MetadataInTextEntry), 16);
- if(rc != SQLITE_OK) {
- goto cleanup;
- }
- sqlite3_value *entry;
- for (rc = sqlite3_vtab_in_first(argv[i], &entry); rc == SQLITE_OK && entry; rc = sqlite3_vtab_in_next(argv[i], &entry)) {
- const char * s = (const char *) sqlite3_value_text(entry);
- int n = sqlite3_value_bytes(entry);
- struct Vec0MetadataInTextEntry entry;
- entry.zString = sqlite3_mprintf("%.*s", n, s);
- if(!entry.zString) {
- rc = SQLITE_NOMEM;
- goto cleanup;
- }
- entry.n = n;
- rc = array_append(&item.array, &entry);
- if (rc != SQLITE_OK) {
- goto cleanup;
- }
- }
- if (rc != SQLITE_DONE) {
- vtab_set_error(&p->base, "Error fetching next value in `x in (...)` text expression");
- goto cleanup;
- }
- break;
- }
- default: {
- vtab_set_error(&p->base, "Internal sqlite-vec error");
- goto cleanup;
- }
- }
- rc = array_append(aMetadataIn, &item);
- if(rc != SQLITE_OK) {
- goto cleanup;
- }
- }
- #endif
- rc = vec0_chunks_iter(p, idxStr, argc, argv, &stmtChunks);
- if (rc != SQLITE_OK) {
- // IMP: V06942_23781
- vtab_set_error(&p->base, "Error preparing stmtChunk: %s",
- sqlite3_errmsg(p->db));
- goto cleanup;
- }
- i64 *topk_rowids = NULL;
- f32 *topk_distances = NULL;
- i64 k_used = 0;
- rc = vec0Filter_knn_chunks_iter(p, stmtChunks, vector_column, vectorColumnIdx,
- arrayRowidsIn, aMetadataIn, idxStr, argc, argv, queryVector, k, &topk_rowids,
- &topk_distances, &k_used);
- if (rc != SQLITE_OK) {
- goto cleanup;
- }
- knn_data->current_idx = 0;
- knn_data->k = k;
- knn_data->rowids = topk_rowids;
- knn_data->distances = topk_distances;
- knn_data->k_used = k_used;
- pCur->knn_data = knn_data;
- pCur->query_plan = VEC0_QUERY_PLAN_KNN;
- rc = SQLITE_OK;
- cleanup:
- sqlite3_finalize(stmtChunks);
- array_cleanup(arrayRowidsIn);
- sqlite3_free(arrayRowidsIn);
- queryVectorCleanup(queryVector);
- if(aMetadataIn) {
- for(size_t i = 0; i < aMetadataIn->length; i++) {
- struct Vec0MetadataIn* item = &((struct Vec0MetadataIn *) aMetadataIn->z)[i];
- for(size_t j = 0; j < item->array.length; j++) {
- if(p->metadata_columns[item->metadata_idx].kind == VEC0_METADATA_COLUMN_KIND_TEXT) {
- struct Vec0MetadataInTextEntry entry = ((struct Vec0MetadataInTextEntry*)item->array.z)[j];
- sqlite3_free(entry.zString);
- }
- }
- array_cleanup(&item->array);
- }
- array_cleanup(aMetadataIn);
- }
- sqlite3_free(aMetadataIn);
- return rc;
- }
- int vec0Filter_fullscan(vec0_vtab *p, vec0_cursor *pCur) {
- int rc;
- char *zSql;
- struct vec0_query_fullscan_data *fullscan_data;
- fullscan_data = sqlite3_malloc(sizeof(*fullscan_data));
- if (!fullscan_data) {
- return SQLITE_NOMEM;
- }
- memset(fullscan_data, 0, sizeof(*fullscan_data));
- zSql = sqlite3_mprintf(" SELECT rowid "
- " FROM " VEC0_SHADOW_ROWIDS_NAME
- " ORDER by chunk_id, chunk_offset ",
- p->schemaName, p->tableName);
- if (!zSql) {
- rc = SQLITE_NOMEM;
- goto error;
- }
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &fullscan_data->rowids_stmt, NULL);
- sqlite3_free(zSql);
- if (rc != SQLITE_OK) {
- // IMP: V09901_26739
- vtab_set_error(&p->base, "Error preparing rowid scan: %s",
- sqlite3_errmsg(p->db));
- goto error;
- }
- rc = sqlite3_step(fullscan_data->rowids_stmt);
- // DONE when there's no rowids, ROW when there are, both "success"
- if (!(rc == SQLITE_ROW || rc == SQLITE_DONE)) {
- goto error;
- }
- fullscan_data->done = rc == SQLITE_DONE;
- pCur->query_plan = VEC0_QUERY_PLAN_FULLSCAN;
- pCur->fullscan_data = fullscan_data;
- return SQLITE_OK;
- error:
- vec0_query_fullscan_data_clear(fullscan_data);
- sqlite3_free(fullscan_data);
- return rc;
- }
- int vec0Filter_point(vec0_cursor *pCur, vec0_vtab *p, int argc,
- sqlite3_value **argv) {
- int rc;
- assert(argc == 1);
- i64 rowid;
- struct vec0_query_point_data *point_data = NULL;
- point_data = sqlite3_malloc(sizeof(*point_data));
- if (!point_data) {
- rc = SQLITE_NOMEM;
- goto error;
- }
- memset(point_data, 0, sizeof(*point_data));
- if (p->pkIsText) {
- rc = vec0_rowid_from_id(p, argv[0], &rowid);
- if (rc == SQLITE_EMPTY) {
- goto eof;
- }
- if (rc != SQLITE_OK) {
- goto error;
- }
- } else {
- rowid = sqlite3_value_int64(argv[0]);
- }
- for (int i = 0; i < p->numVectorColumns; i++) {
- rc = vec0_get_vector_data(p, rowid, i, &point_data->vectors[i], NULL);
- if (rc == SQLITE_EMPTY) {
- goto eof;
- }
- if (rc != SQLITE_OK) {
- goto error;
- }
- }
- point_data->rowid = rowid;
- point_data->done = 0;
- pCur->point_data = point_data;
- pCur->query_plan = VEC0_QUERY_PLAN_POINT;
- return SQLITE_OK;
- eof:
- point_data->rowid = rowid;
- point_data->done = 1;
- pCur->point_data = point_data;
- pCur->query_plan = VEC0_QUERY_PLAN_POINT;
- return SQLITE_OK;
- error:
- vec0_query_point_data_clear(point_data);
- sqlite3_free(point_data);
- return rc;
- }
- static int vec0Filter(sqlite3_vtab_cursor *pVtabCursor, int idxNum,
- const char *idxStr, int argc, sqlite3_value **argv) {
- vec0_vtab *p = (vec0_vtab *)pVtabCursor->pVtab;
- vec0_cursor *pCur = (vec0_cursor *)pVtabCursor;
- vec0_cursor_clear(pCur);
- int idxStrLength = strlen(idxStr);
- if(idxStrLength <= 0) {
- return SQLITE_ERROR;
- }
- if((idxStrLength-1) % 4 != 0) {
- return SQLITE_ERROR;
- }
- int numValueEntries = (idxStrLength-1) / 4;
- if(numValueEntries != argc) {
- return SQLITE_ERROR;
- }
- char query_plan = idxStr[0];
- switch(query_plan) {
- case VEC0_QUERY_PLAN_FULLSCAN:
- return vec0Filter_fullscan(p, pCur);
- case VEC0_QUERY_PLAN_KNN:
- return vec0Filter_knn(pCur, p, idxNum, idxStr, argc, argv);
- case VEC0_QUERY_PLAN_POINT:
- return vec0Filter_point(pCur, p, argc, argv);
- default:
- vtab_set_error(pVtabCursor->pVtab, "unknown idxStr '%s'", idxStr);
- return SQLITE_ERROR;
- }
- }
- static int vec0Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) {
- vec0_cursor *pCur = (vec0_cursor *)cur;
- switch (pCur->query_plan) {
- case VEC0_QUERY_PLAN_FULLSCAN: {
- *pRowid = sqlite3_column_int64(pCur->fullscan_data->rowids_stmt, 0);
- return SQLITE_OK;
- }
- case VEC0_QUERY_PLAN_POINT: {
- *pRowid = pCur->point_data->rowid;
- return SQLITE_OK;
- }
- case VEC0_QUERY_PLAN_KNN: {
- vtab_set_error(cur->pVtab,
- "Internal sqlite-vec error: expected point query plan in "
- "vec0Rowid, found %d",
- pCur->query_plan);
- return SQLITE_ERROR;
- }
- }
- return SQLITE_ERROR;
- }
- static int vec0Next(sqlite3_vtab_cursor *cur) {
- vec0_cursor *pCur = (vec0_cursor *)cur;
- switch (pCur->query_plan) {
- case VEC0_QUERY_PLAN_FULLSCAN: {
- if (!pCur->fullscan_data) {
- return SQLITE_ERROR;
- }
- int rc = sqlite3_step(pCur->fullscan_data->rowids_stmt);
- if (rc == SQLITE_DONE) {
- pCur->fullscan_data->done = 1;
- return SQLITE_OK;
- }
- if (rc == SQLITE_ROW) {
- return SQLITE_OK;
- }
- return SQLITE_ERROR;
- }
- case VEC0_QUERY_PLAN_KNN: {
- if (!pCur->knn_data) {
- return SQLITE_ERROR;
- }
- pCur->knn_data->current_idx++;
- return SQLITE_OK;
- }
- case VEC0_QUERY_PLAN_POINT: {
- if (!pCur->point_data) {
- return SQLITE_ERROR;
- }
- pCur->point_data->done = 1;
- return SQLITE_OK;
- }
- }
- return SQLITE_ERROR;
- }
- static int vec0Eof(sqlite3_vtab_cursor *cur) {
- vec0_cursor *pCur = (vec0_cursor *)cur;
- switch (pCur->query_plan) {
- case VEC0_QUERY_PLAN_FULLSCAN: {
- if (!pCur->fullscan_data) {
- return 1;
- }
- return pCur->fullscan_data->done;
- }
- case VEC0_QUERY_PLAN_KNN: {
- if (!pCur->knn_data) {
- return 1;
- }
- // return (pCur->knn_data->current_idx >= pCur->knn_data->k) ||
- // (pCur->knn_data->distances[pCur->knn_data->current_idx] == FLT_MAX);
- return (pCur->knn_data->current_idx >= pCur->knn_data->k_used);
- }
- case VEC0_QUERY_PLAN_POINT: {
- if (!pCur->point_data) {
- return 1;
- }
- return pCur->point_data->done;
- }
- }
- return 1;
- }
- static int vec0Column_fullscan(vec0_vtab *pVtab, vec0_cursor *pCur,
- sqlite3_context *context, int i) {
- if (!pCur->fullscan_data) {
- sqlite3_result_error(
- context, "Internal sqlite-vec error: fullscan_data is NULL.", -1);
- return SQLITE_ERROR;
- }
- i64 rowid = sqlite3_column_int64(pCur->fullscan_data->rowids_stmt, 0);
- if (i == VEC0_COLUMN_ID) {
- return vec0_result_id(pVtab, context, rowid);
- }
- else if (vec0_column_idx_is_vector(pVtab, i)) {
- void *v;
- int sz;
- int vector_idx = vec0_column_idx_to_vector_idx(pVtab, i);
- int rc = vec0_get_vector_data(pVtab, rowid, vector_idx, &v, &sz);
- if (rc != SQLITE_OK) {
- return rc;
- }
- sqlite3_result_blob(context, v, sz, sqlite3_free);
- sqlite3_result_subtype(context,
- pVtab->vector_columns[vector_idx].element_type);
- }
- else if (i == vec0_column_distance_idx(pVtab)) {
- sqlite3_result_null(context);
- }
- else if(vec0_column_idx_is_partition(pVtab, i)) {
- int partition_idx = vec0_column_idx_to_partition_idx(pVtab, i);
- sqlite3_value * v;
- int rc = vec0_get_partition_value_for_rowid(pVtab, rowid, partition_idx, &v);
- if(rc == SQLITE_OK) {
- sqlite3_result_value(context, v);
- sqlite3_value_free(v);
- }else {
- sqlite3_result_error_code(context, rc);
- }
- }
- else if(vec0_column_idx_is_auxiliary(pVtab, i)) {
- int auxiliary_idx = vec0_column_idx_to_auxiliary_idx(pVtab, i);
- sqlite3_value * v;
- int rc = vec0_get_auxiliary_value_for_rowid(pVtab, rowid, auxiliary_idx, &v);
- if(rc == SQLITE_OK) {
- sqlite3_result_value(context, v);
- sqlite3_value_free(v);
- }else {
- sqlite3_result_error_code(context, rc);
- }
- }
- else if(vec0_column_idx_is_metadata(pVtab, i)) {
- if(sqlite3_vtab_nochange(context)) {
- return SQLITE_OK;
- }
- int metadata_idx = vec0_column_idx_to_metadata_idx(pVtab, i);
- int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context);
- if(rc != SQLITE_OK) {
- // IMP: V15466_32305
- const char * zErr = sqlite3_mprintf(
- "Could not extract metadata value for column %.*s at rowid %lld",
- pVtab->metadata_columns[metadata_idx].name_length,
- pVtab->metadata_columns[metadata_idx].name, rowid
- );
- if(zErr) {
- sqlite3_result_error(context, zErr, -1);
- sqlite3_free((void *) zErr);
- }else {
- sqlite3_result_error_nomem(context);
- }
- }
- }
- return SQLITE_OK;
- }
- static int vec0Column_point(vec0_vtab *pVtab, vec0_cursor *pCur,
- sqlite3_context *context, int i) {
- if (!pCur->point_data) {
- sqlite3_result_error(context,
- "Internal sqlite-vec error: point_data is NULL.", -1);
- return SQLITE_ERROR;
- }
- if (i == VEC0_COLUMN_ID) {
- return vec0_result_id(pVtab, context, pCur->point_data->rowid);
- }
- else if (i == vec0_column_distance_idx(pVtab)) {
- sqlite3_result_null(context);
- return SQLITE_OK;
- }
- else if (vec0_column_idx_is_vector(pVtab, i)) {
- if (sqlite3_vtab_nochange(context)) {
- sqlite3_result_null(context);
- return SQLITE_OK;
- }
- int vector_idx = vec0_column_idx_to_vector_idx(pVtab, i);
- sqlite3_result_blob(
- context, pCur->point_data->vectors[vector_idx],
- vector_column_byte_size(pVtab->vector_columns[vector_idx]),
- SQLITE_TRANSIENT);
- sqlite3_result_subtype(context,
- pVtab->vector_columns[vector_idx].element_type);
- return SQLITE_OK;
- }
- else if(vec0_column_idx_is_partition(pVtab, i)) {
- if(sqlite3_vtab_nochange(context)) {
- return SQLITE_OK;
- }
- int partition_idx = vec0_column_idx_to_partition_idx(pVtab, i);
- i64 rowid = pCur->point_data->rowid;
- sqlite3_value * v;
- int rc = vec0_get_partition_value_for_rowid(pVtab, rowid, partition_idx, &v);
- if(rc == SQLITE_OK) {
- sqlite3_result_value(context, v);
- sqlite3_value_free(v);
- }else {
- sqlite3_result_error_code(context, rc);
- }
- }
- else if(vec0_column_idx_is_auxiliary(pVtab, i)) {
- if(sqlite3_vtab_nochange(context)) {
- return SQLITE_OK;
- }
- i64 rowid = pCur->point_data->rowid;
- int auxiliary_idx = vec0_column_idx_to_auxiliary_idx(pVtab, i);
- sqlite3_value * v;
- int rc = vec0_get_auxiliary_value_for_rowid(pVtab, rowid, auxiliary_idx, &v);
- if(rc == SQLITE_OK) {
- sqlite3_result_value(context, v);
- sqlite3_value_free(v);
- }else {
- sqlite3_result_error_code(context, rc);
- }
- }
- else if(vec0_column_idx_is_metadata(pVtab, i)) {
- if(sqlite3_vtab_nochange(context)) {
- return SQLITE_OK;
- }
- i64 rowid = pCur->point_data->rowid;
- int metadata_idx = vec0_column_idx_to_metadata_idx(pVtab, i);
- int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context);
- if(rc != SQLITE_OK) {
- const char * zErr = sqlite3_mprintf(
- "Could not extract metadata value for column %.*s at rowid %lld",
- pVtab->metadata_columns[metadata_idx].name_length,
- pVtab->metadata_columns[metadata_idx].name, rowid
- );
- if(zErr) {
- sqlite3_result_error(context, zErr, -1);
- sqlite3_free((void *) zErr);
- }else {
- sqlite3_result_error_nomem(context);
- }
- }
- }
- return SQLITE_OK;
- }
- static int vec0Column_knn(vec0_vtab *pVtab, vec0_cursor *pCur,
- sqlite3_context *context, int i) {
- if (!pCur->knn_data) {
- sqlite3_result_error(context,
- "Internal sqlite-vec error: knn_data is NULL.", -1);
- return SQLITE_ERROR;
- }
- if (i == VEC0_COLUMN_ID) {
- i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx];
- return vec0_result_id(pVtab, context, rowid);
- }
- else if (i == vec0_column_distance_idx(pVtab)) {
- sqlite3_result_double(
- context, pCur->knn_data->distances[pCur->knn_data->current_idx]);
- return SQLITE_OK;
- }
- else if (vec0_column_idx_is_vector(pVtab, i)) {
- void *out;
- int sz;
- int vector_idx = vec0_column_idx_to_vector_idx(pVtab, i);
- int rc = vec0_get_vector_data(
- pVtab, pCur->knn_data->rowids[pCur->knn_data->current_idx], vector_idx,
- &out, &sz);
- if (rc != SQLITE_OK) {
- return rc;
- }
- sqlite3_result_blob(context, out, sz, sqlite3_free);
- sqlite3_result_subtype(context,
- pVtab->vector_columns[vector_idx].element_type);
- return SQLITE_OK;
- }
- else if(vec0_column_idx_is_partition(pVtab, i)) {
- int partition_idx = vec0_column_idx_to_partition_idx(pVtab, i);
- i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx];
- sqlite3_value * v;
- int rc = vec0_get_partition_value_for_rowid(pVtab, rowid, partition_idx, &v);
- if(rc == SQLITE_OK) {
- sqlite3_result_value(context, v);
- sqlite3_value_free(v);
- }else {
- sqlite3_result_error_code(context, rc);
- }
- }
- else if(vec0_column_idx_is_auxiliary(pVtab, i)) {
- int auxiliary_idx = vec0_column_idx_to_auxiliary_idx(pVtab, i);
- i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx];
- sqlite3_value * v;
- int rc = vec0_get_auxiliary_value_for_rowid(pVtab, rowid, auxiliary_idx, &v);
- if(rc == SQLITE_OK) {
- sqlite3_result_value(context, v);
- sqlite3_value_free(v);
- }else {
- sqlite3_result_error_code(context, rc);
- }
- }
- else if(vec0_column_idx_is_metadata(pVtab, i)) {
- int metadata_idx = vec0_column_idx_to_metadata_idx(pVtab, i);
- i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx];
- int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context);
- if(rc != SQLITE_OK) {
- const char * zErr = sqlite3_mprintf(
- "Could not extract metadata value for column %.*s at rowid %lld",
- pVtab->metadata_columns[metadata_idx].name_length,
- pVtab->metadata_columns[metadata_idx].name, rowid
- );
- if(zErr) {
- sqlite3_result_error(context, zErr, -1);
- sqlite3_free((void *) zErr);
- }else {
- sqlite3_result_error_nomem(context);
- }
- }
- }
- return SQLITE_OK;
- }
- static int vec0Column(sqlite3_vtab_cursor *cur, sqlite3_context *context,
- int i) {
- vec0_cursor *pCur = (vec0_cursor *)cur;
- vec0_vtab *pVtab = (vec0_vtab *)cur->pVtab;
- switch (pCur->query_plan) {
- case VEC0_QUERY_PLAN_FULLSCAN: {
- return vec0Column_fullscan(pVtab, pCur, context, i);
- }
- case VEC0_QUERY_PLAN_KNN: {
- return vec0Column_knn(pVtab, pCur, context, i);
- }
- case VEC0_QUERY_PLAN_POINT: {
- return vec0Column_point(pVtab, pCur, context, i);
- }
- }
- return SQLITE_OK;
- }
- /**
- * @brief Handles the "insert rowid" step of a row insert operation of a vec0
- * table.
- *
- * This function will insert a new row into the _rowids vec0 shadow table.
- *
- * @param p: virtual table
- * @param idValue: Value containing the inserted rowid/id value.
- * @param rowid: Output rowid, will point to the "real" i64 rowid
- * value that was inserted
- * @return int SQLITE_OK on success, error code on failure
- */
- int vec0Update_InsertRowidStep(vec0_vtab *p, sqlite3_value *idValue,
- i64 *rowid) {
- /**
- * An insert into a vec0 table can happen a few different ways:
- * 1) With default INTEGER primary key: With a supplied i64 rowid
- * 2) With default INTEGER primary key: WITHOUT a supplied rowid
- * 3) With TEXT primary key: supplied text rowid
- */
- int rc;
- // Option 3: vtab has a user-defined TEXT primary key, so ensure a text value
- // is provided.
- if (p->pkIsText) {
- if (sqlite3_value_type(idValue) != SQLITE_TEXT) {
- // IMP: V04200_21039
- vtab_set_error(&p->base,
- "The %s virtual table was declared with a TEXT primary "
- "key, but a non-TEXT value was provided in an INSERT.",
- p->tableName);
- return SQLITE_ERROR;
- }
- return vec0_rowids_insert_id(p, idValue, rowid);
- }
- // Option 1: User supplied a i64 rowid
- if (sqlite3_value_type(idValue) == SQLITE_INTEGER) {
- i64 suppliedRowid = sqlite3_value_int64(idValue);
- rc = vec0_rowids_insert_rowid(p, suppliedRowid);
- if (rc == SQLITE_OK) {
- *rowid = suppliedRowid;
- }
- return rc;
- }
- // Option 2: User did not suppled a rowid
- if (sqlite3_value_type(idValue) != SQLITE_NULL) {
- // IMP: V30855_14925
- vtab_set_error(&p->base,
- "Only integers are allows for primary key values on %s",
- p->tableName);
- return SQLITE_ERROR;
- }
- // NULL to get next auto-incremented value
- return vec0_rowids_insert_id(p, NULL, rowid);
- }
- /**
- * @brief Determines the "next available" chunk position for a newly inserted
- * vec0 row.
- *
- * This operation may insert a new "blank" chunk the _chunks table, if there is
- * no more space in previous chunks.
- *
- * @param p: virtual table
- * @param partitionKeyValues: array of partition key column values, to constrain
- * against any partition key columns.
- * @param chunk_rowid: Output rowid of the chunk in the _chunks virtual table
- * that has the avialabiity.
- * @param chunk_offset: Output the index of the available space insert the
- * chunk, based on the index of the first available validity bit.
- * @param pBlobValidity: Output blob of the validity column of the available
- * chunk. Will be opened with read/write permissions.
- * @param pValidity: Output buffer of the original chunk's validity column.
- * Needs to be cleaned up with sqlite3_free().
- * @return int SQLITE_OK on success, error code on failure
- */
- int vec0Update_InsertNextAvailableStep(
- vec0_vtab *p,
- sqlite3_value ** partitionKeyValues,
- i64 *chunk_rowid, i64 *chunk_offset,
- sqlite3_blob **blobChunksValidity,
- const unsigned char **bufferChunksValidity) {
- int rc;
- i64 validitySize;
- *chunk_offset = -1;
- rc = vec0_get_latest_chunk_rowid(p, chunk_rowid, partitionKeyValues);
- if(rc == SQLITE_EMPTY) {
- goto done;
- }
- if (rc != SQLITE_OK) {
- goto cleanup;
- }
- rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName, "validity",
- *chunk_rowid, 1, blobChunksValidity);
- if (rc != SQLITE_OK) {
- // IMP: V22053_06123
- vtab_set_error(&p->base,
- VEC_INTERAL_ERROR
- "could not open validity blob on %s.%s.%lld",
- p->schemaName, p->shadowChunksName, *chunk_rowid);
- goto cleanup;
- }
- validitySize = sqlite3_blob_bytes(*blobChunksValidity);
- if (validitySize != p->chunk_size / CHAR_BIT) {
- // IMP: V29362_13432
- vtab_set_error(&p->base,
- VEC_INTERAL_ERROR
- "validity blob size mismatch on "
- "%s.%s.%lld, expected %lld but received %lld.",
- p->schemaName, p->shadowChunksName, *chunk_rowid,
- (i64)(p->chunk_size / CHAR_BIT), validitySize);
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- *bufferChunksValidity = sqlite3_malloc(validitySize);
- if (!(*bufferChunksValidity)) {
- vtab_set_error(&p->base, VEC_INTERAL_ERROR
- "Could not allocate memory for validity bitmap");
- rc = SQLITE_NOMEM;
- goto cleanup;
- }
- rc = sqlite3_blob_read(*blobChunksValidity, (void *)*bufferChunksValidity,
- validitySize, 0);
- if (rc != SQLITE_OK) {
- vtab_set_error(&p->base,
- VEC_INTERAL_ERROR
- "Could not read validity bitmap for %s.%s.%lld",
- p->schemaName, p->shadowChunksName, *chunk_rowid);
- goto cleanup;
- }
- // find the next available offset, ie first `0` in the bitmap.
- for (int i = 0; i < validitySize; i++) {
- if ((*bufferChunksValidity)[i] == 0b11111111)
- continue;
- for (int j = 0; j < CHAR_BIT; j++) {
- if (((((*bufferChunksValidity)[i] >> j) & 1) == 0)) {
- *chunk_offset = (i * CHAR_BIT) + j;
- goto done;
- }
- }
- }
- done:
- // latest chunk was full, so need to create a new one
- if (*chunk_offset == -1) {
- rc = vec0_new_chunk(p, partitionKeyValues, chunk_rowid);
- if (rc != SQLITE_OK) {
- // IMP: V08441_25279
- vtab_set_error(&p->base,
- VEC_INTERAL_ERROR "Could not insert a new vector chunk");
- rc = SQLITE_ERROR; // otherwise raises a DatabaseError and not operational
- // error?
- goto cleanup;
- }
- *chunk_offset = 0;
- // blobChunksValidity and pValidity are stale, pointing to the previous
- // (full) chunk. to re-assign them
- rc = sqlite3_blob_close(*blobChunksValidity);
- sqlite3_free((void *)*bufferChunksValidity);
- *blobChunksValidity = NULL;
- *bufferChunksValidity = NULL;
- if (rc != SQLITE_OK) {
- vtab_set_error(&p->base, VEC_INTERAL_ERROR
- "unknown error, blobChunksValidity could not be closed, "
- "please file an issue.");
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName,
- "validity", *chunk_rowid, 1, blobChunksValidity);
- if (rc != SQLITE_OK) {
- vtab_set_error(
- &p->base,
- VEC_INTERAL_ERROR
- "Could not open validity blob for newly created chunk %s.%s.%lld",
- p->schemaName, p->shadowChunksName, *chunk_rowid);
- goto cleanup;
- }
- validitySize = sqlite3_blob_bytes(*blobChunksValidity);
- if (validitySize != p->chunk_size / CHAR_BIT) {
- vtab_set_error(&p->base,
- VEC_INTERAL_ERROR
- "validity blob size mismatch for newly created chunk "
- "%s.%s.%lld. Exepcted %lld, got %lld",
- p->schemaName, p->shadowChunksName, *chunk_rowid,
- p->chunk_size / CHAR_BIT, validitySize);
- goto cleanup;
- }
- *bufferChunksValidity = sqlite3_malloc(validitySize);
- rc = sqlite3_blob_read(*blobChunksValidity, (void *)*bufferChunksValidity,
- validitySize, 0);
- if (rc != SQLITE_OK) {
- vtab_set_error(&p->base,
- VEC_INTERAL_ERROR
- "could not read validity blob newly created chunk "
- "%s.%s.%lld",
- p->schemaName, p->shadowChunksName, *chunk_rowid);
- goto cleanup;
- }
- }
- rc = SQLITE_OK;
- cleanup:
- return rc;
- }
- /**
- * @brief Write the vector data into the provided vector blob at the given
- * offset
- *
- * @param blobVectors SQLite BLOB to write to
- * @param chunk_offset the "offset" (ie validity bitmap position) to write the
- * vector to
- * @param bVector pointer to the vector containing data
- * @param dimensions how many dimensions the vector has
- * @param element_type the vector type
- * @return result of sqlite3_blob_write, SQLITE_OK on success, otherwise failure
- */
- static int
- vec0_write_vector_to_vector_blob(sqlite3_blob *blobVectors, i64 chunk_offset,
- const void *bVector, size_t dimensions,
- enum VectorElementType element_type) {
- int n;
- int offset;
- switch (element_type) {
- case SQLITE_VEC_ELEMENT_TYPE_FLOAT32:
- n = dimensions * sizeof(f32);
- offset = chunk_offset * dimensions * sizeof(f32);
- break;
- case SQLITE_VEC_ELEMENT_TYPE_INT8:
- n = dimensions * sizeof(i8);
- offset = chunk_offset * dimensions * sizeof(i8);
- break;
- case SQLITE_VEC_ELEMENT_TYPE_BIT:
- n = dimensions / CHAR_BIT;
- offset = chunk_offset * dimensions / CHAR_BIT;
- break;
- }
- return sqlite3_blob_write(blobVectors, bVector, n, offset);
- }
- /**
- * @brief
- *
- * @param p vec0 virtual table
- * @param chunk_rowid: which chunk to write to
- * @param chunk_offset: the offset inside the chunk to write the vector to.
- * @param rowid: the rowid of the inserting row
- * @param vectorDatas: array of the vector data to insert
- * @param blobValidity: writeable validity blob of the row's assigned chunk.
- * @param validity: snapshot buffer of the valdity column from the row's
- * assigned chunk.
- * @return int SQLITE_OK on success, error code on failure
- */
- int vec0Update_InsertWriteFinalStep(vec0_vtab *p, i64 chunk_rowid,
- i64 chunk_offset, i64 rowid,
- void *vectorDatas[],
- sqlite3_blob *blobChunksValidity,
- const unsigned char *bufferChunksValidity) {
- int rc, brc;
- sqlite3_blob *blobChunksRowids = NULL;
- // mark the validity bit for this row in the chunk's validity bitmap
- // Get the byte offset of the bitmap
- char unsigned bx = bufferChunksValidity[chunk_offset / CHAR_BIT];
- // set the bit at the chunk_offset position inside that byte
- bx = bx | (1 << (chunk_offset % CHAR_BIT));
- // write that 1 byte
- rc = sqlite3_blob_write(blobChunksValidity, &bx, 1, chunk_offset / CHAR_BIT);
- if (rc != SQLITE_OK) {
- vtab_set_error(&p->base, VEC_INTERAL_ERROR "could not mark validity bit ");
- return rc;
- }
- // Go insert the vector data into the vector chunk shadow tables
- for (int i = 0; i < p->numVectorColumns; i++) {
- sqlite3_blob *blobVectors;
- rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowVectorChunksNames[i],
- "vectors", chunk_rowid, 1, &blobVectors);
- if (rc != SQLITE_OK) {
- vtab_set_error(&p->base, "Error opening vector blob at %s.%s.%lld",
- p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid);
- goto cleanup;
- }
- i64 expected =
- p->chunk_size * vector_column_byte_size(p->vector_columns[i]);
- i64 actual = sqlite3_blob_bytes(blobVectors);
- if (actual != expected) {
- // IMP: V16386_00456
- vtab_set_error(
- &p->base,
- VEC_INTERAL_ERROR
- "vector blob size mismatch on %s.%s.%lld. Expected %lld, actual %lld",
- p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid, expected,
- actual);
- rc = SQLITE_ERROR;
- // already error, can ignore result code
- sqlite3_blob_close(blobVectors);
- goto cleanup;
- };
- rc = vec0_write_vector_to_vector_blob(
- blobVectors, chunk_offset, vectorDatas[i],
- p->vector_columns[i].dimensions, p->vector_columns[i].element_type);
- if (rc != SQLITE_OK) {
- vtab_set_error(&p->base,
- VEC_INTERAL_ERROR
- "could not write vector blob on %s.%s.%lld",
- p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid);
- rc = SQLITE_ERROR;
- // already error, can ignore result code
- sqlite3_blob_close(blobVectors);
- goto cleanup;
- }
- rc = sqlite3_blob_close(blobVectors);
- if (rc != SQLITE_OK) {
- vtab_set_error(&p->base,
- VEC_INTERAL_ERROR
- "could not close vector blob on %s.%s.%lld",
- p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid);
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- }
- // write the new rowid to the rowids column of the _chunks table
- rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName, "rowids",
- chunk_rowid, 1, &blobChunksRowids);
- if (rc != SQLITE_OK) {
- // IMP: V09221_26060
- vtab_set_error(&p->base,
- VEC_INTERAL_ERROR "could not open rowids blob on %s.%s.%lld",
- p->schemaName, p->shadowChunksName, chunk_rowid);
- goto cleanup;
- }
- i64 expected = p->chunk_size * sizeof(i64);
- i64 actual = sqlite3_blob_bytes(blobChunksRowids);
- if (expected != actual) {
- // IMP: V12779_29618
- vtab_set_error(
- &p->base,
- VEC_INTERAL_ERROR
- "rowids blob size mismatch on %s.%s.%lld. Expected %lld, actual %lld",
- p->schemaName, p->shadowChunksName, chunk_rowid, expected, actual);
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- rc = sqlite3_blob_write(blobChunksRowids, &rowid, sizeof(i64),
- chunk_offset * sizeof(i64));
- if (rc != SQLITE_OK) {
- vtab_set_error(
- &p->base, VEC_INTERAL_ERROR "could not write rowids blob on %s.%s.%lld",
- p->schemaName, p->shadowChunksName, chunk_rowid);
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- // Now with all the vectors inserted, go back and update the _rowids table
- // with the new chunk_rowid/chunk_offset values
- rc = vec0_rowids_update_position(p, rowid, chunk_rowid, chunk_offset);
- cleanup:
- brc = sqlite3_blob_close(blobChunksRowids);
- if ((rc == SQLITE_OK) && (brc != SQLITE_OK)) {
- vtab_set_error(
- &p->base, VEC_INTERAL_ERROR "could not close rowids blob on %s.%s.%lld",
- p->schemaName, p->shadowChunksName, chunk_rowid);
- return brc;
- }
- return rc;
- }
- int vec0_write_metadata_value(vec0_vtab *p, int metadata_column_idx, i64 rowid, i64 chunk_id, i64 chunk_offset, sqlite3_value * v, int isupdate) {
- int rc;
- struct Vec0MetadataColumnDefinition * metadata_column = &p->metadata_columns[metadata_column_idx];
- vec0_metadata_column_kind kind = metadata_column->kind;
- // verify input value matches column type
- switch(kind) {
- case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
- if(sqlite3_value_type(v) != SQLITE_INTEGER || ((sqlite3_value_int(v) != 0) && (sqlite3_value_int(v) != 1))) {
- rc = SQLITE_ERROR;
- vtab_set_error(&p->base, "Expected 0 or 1 for BOOLEAN metadata column %.*s", metadata_column->name_length, metadata_column->name);
- goto done;
- }
- break;
- }
- case VEC0_METADATA_COLUMN_KIND_INTEGER: {
- if(sqlite3_value_type(v) != SQLITE_INTEGER) {
- rc = SQLITE_ERROR;
- vtab_set_error(&p->base, "Expected integer for INTEGER metadata column %.*s, received %s", metadata_column->name_length, metadata_column->name, type_name(sqlite3_value_type(v)));
- goto done;
- }
- break;
- }
- case VEC0_METADATA_COLUMN_KIND_FLOAT: {
- if(sqlite3_value_type(v) != SQLITE_FLOAT) {
- rc = SQLITE_ERROR;
- vtab_set_error(&p->base, "Expected float for FLOAT metadata column %.*s, received %s", metadata_column->name_length, metadata_column->name, type_name(sqlite3_value_type(v)));
- goto done;
- }
- break;
- }
- case VEC0_METADATA_COLUMN_KIND_TEXT: {
- if(sqlite3_value_type(v) != SQLITE_TEXT) {
- rc = SQLITE_ERROR;
- vtab_set_error(&p->base, "Expected text for TEXT metadata column %.*s, received %s", metadata_column->name_length, metadata_column->name, type_name(sqlite3_value_type(v)));
- goto done;
- }
- break;
- }
- }
- sqlite3_blob * blobValue = NULL;
- rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_column_idx], "data", chunk_id, 1, &blobValue);
- if(rc != SQLITE_OK) {
- goto done;
- }
- switch(kind) {
- case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
- u8 block;
- int value = sqlite3_value_int(v);
- rc = sqlite3_blob_read(blobValue, &block, sizeof(u8), (int) (chunk_offset / CHAR_BIT));
- if(rc != SQLITE_OK) {
- goto done;
- }
- if (value) {
- block |= 1 << (chunk_offset % CHAR_BIT);
- } else {
- block &= ~(1 << (chunk_offset % CHAR_BIT));
- }
- rc = sqlite3_blob_write(blobValue, &block, sizeof(u8), chunk_offset / CHAR_BIT);
- break;
- }
- case VEC0_METADATA_COLUMN_KIND_INTEGER: {
- i64 value = sqlite3_value_int64(v);
- rc = sqlite3_blob_write(blobValue, &value, sizeof(value), chunk_offset * sizeof(i64));
- break;
- }
- case VEC0_METADATA_COLUMN_KIND_FLOAT: {
- double value = sqlite3_value_double(v);
- rc = sqlite3_blob_write(blobValue, &value, sizeof(value), chunk_offset * sizeof(double));
- break;
- }
- case VEC0_METADATA_COLUMN_KIND_TEXT: {
- int prev_n;
- rc = sqlite3_blob_read(blobValue, &prev_n, sizeof(int), chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
- if(rc != SQLITE_OK) {
- goto done;
- }
- const char * s = (const char *) sqlite3_value_text(v);
- int n = sqlite3_value_bytes(v);
- u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
- memset(view, 0, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
- memcpy(view, &n, sizeof(int));
- memcpy(view+4, s, min(n, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH-4));
- rc = sqlite3_blob_write(blobValue, &view, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH, chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
- if(n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
- const char * zSql;
- if(isupdate && (prev_n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH)) {
- zSql = sqlite3_mprintf("UPDATE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " SET data = ?2 WHERE rowid = ?1", p->schemaName, p->tableName, metadata_column_idx);
- }else {
- zSql = sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " (rowid, data) VALUES (?1, ?2)", p->schemaName, p->tableName, metadata_column_idx);
- }
- if(!zSql) {
- rc = SQLITE_NOMEM;
- goto done;
- }
- sqlite3_stmt * stmt;
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
- if(rc != SQLITE_OK) {
- goto done;
- }
- sqlite3_bind_int64(stmt, 1, rowid);
- sqlite3_bind_text(stmt, 2, s, n, SQLITE_STATIC);
- rc = sqlite3_step(stmt);
- sqlite3_finalize(stmt);
- if(rc != SQLITE_DONE) {
- rc = SQLITE_ERROR;
- goto done;
- }
- }
- else if(prev_n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
- const char * zSql = sqlite3_mprintf("DELETE FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " WHERE rowid = ?", p->schemaName, p->tableName, metadata_column_idx);
- if(!zSql) {
- rc = SQLITE_NOMEM;
- goto done;
- }
- sqlite3_stmt * stmt;
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
- if(rc != SQLITE_OK) {
- goto done;
- }
- sqlite3_bind_int64(stmt, 1, rowid);
- rc = sqlite3_step(stmt);
- sqlite3_finalize(stmt);
- if(rc != SQLITE_DONE) {
- rc = SQLITE_ERROR;
- goto done;
- }
- }
- break;
- }
- }
- if(rc != SQLITE_OK) {
- }
- rc = sqlite3_blob_close(blobValue);
- if(rc != SQLITE_OK) {
- goto done;
- }
- done:
- return rc;
- }
- /**
- * @brief Handles INSERT INTO operations on a vec0 table.
- *
- * @return int SQLITE_OK on success, otherwise error code on failure
- */
- int vec0Update_Insert(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv,
- sqlite_int64 *pRowid) {
- UNUSED_PARAMETER(argc);
- vec0_vtab *p = (vec0_vtab *)pVTab;
- int rc;
- // Rowid for the inserted row, deterimined by the inserted ID + _rowids shadow
- // table
- i64 rowid;
- // Array to hold the vector data of the inserted row. Individual elements will
- // have a lifetime bound to the argv[..] values.
- void *vectorDatas[VEC0_MAX_VECTOR_COLUMNS];
- // Array to hold cleanup functions for vectorDatas[]
- vector_cleanup cleanups[VEC0_MAX_VECTOR_COLUMNS];
- sqlite3_value * partitionKeyValues[VEC0_MAX_PARTITION_COLUMNS];
- // Rowid of the chunk in the _chunks shadow table that the row will be a part
- // of.
- i64 chunk_rowid;
- // offset within the chunk where the rowid belongs
- i64 chunk_offset;
- // a write-able blob of the validity column for the given chunk. Used to mark
- // validity bit
- sqlite3_blob *blobChunksValidity = NULL;
- // buffer for the valididty column for the given chunk. Maybe not needed here?
- const unsigned char *bufferChunksValidity = NULL;
- int numReadVectors = 0;
- // Read all provided partition key values into partitionKeyValues
- for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
- if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_PARTITION) {
- continue;
- }
- int partition_key_idx = p->user_column_idxs[i];
- partitionKeyValues[partition_key_idx] = argv[2+VEC0_COLUMN_USERN_START + i];
- int new_value_type = sqlite3_value_type(partitionKeyValues[partition_key_idx]);
- if((new_value_type != SQLITE_NULL) && (new_value_type != p->paritition_columns[partition_key_idx].type)) {
- // IMP: V11454_28292
- vtab_set_error(
- pVTab,
- "Parition key type mismatch: The partition key column %.*s has type %s, but %s was provided.",
- p->paritition_columns[partition_key_idx].name_length,
- p->paritition_columns[partition_key_idx].name,
- type_name(p->paritition_columns[partition_key_idx].type),
- type_name(new_value_type)
- );
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- }
- // read all the inserted vectors into vectorDatas, validate their lengths.
- for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
- if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_VECTOR) {
- continue;
- }
- int vector_column_idx = p->user_column_idxs[i];
- sqlite3_value *valueVector = argv[2 + VEC0_COLUMN_USERN_START + i];
- size_t dimensions;
- char *pzError;
- enum VectorElementType elementType;
- rc = vector_from_value(valueVector, &vectorDatas[vector_column_idx], &dimensions,
- &elementType, &cleanups[vector_column_idx], &pzError);
- if (rc != SQLITE_OK) {
- // IMP: V06519_23358
- vtab_set_error(
- pVTab, "Inserted vector for the \"%.*s\" column is invalid: %z",
- p->vector_columns[vector_column_idx].name_length, p->vector_columns[vector_column_idx].name, pzError);
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- numReadVectors++;
- if (elementType != p->vector_columns[vector_column_idx].element_type) {
- // IMP: V08221_25059
- vtab_set_error(
- pVTab,
- "Inserted vector for the \"%.*s\" column is expected to be of type "
- "%s, but a %s vector was provided.",
- p->vector_columns[i].name_length, p->vector_columns[i].name,
- vector_subtype_name(p->vector_columns[i].element_type),
- vector_subtype_name(elementType));
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- if (dimensions != p->vector_columns[vector_column_idx].dimensions) {
- // IMP: V01145_17984
- vtab_set_error(
- pVTab,
- "Dimension mismatch for inserted vector for the \"%.*s\" column. "
- "Expected %d dimensions but received %d.",
- p->vector_columns[vector_column_idx].name_length, p->vector_columns[vector_column_idx].name,
- p->vector_columns[vector_column_idx].dimensions, dimensions);
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- }
- // Cannot insert a value in the hidden "distance" column
- if (sqlite3_value_type(argv[2 + vec0_column_distance_idx(p)]) !=
- SQLITE_NULL) {
- // IMP: V24228_08298
- vtab_set_error(pVTab,
- "A value was provided for the hidden \"distance\" column.");
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- // Cannot insert a value in the hidden "k" column
- if (sqlite3_value_type(argv[2 + vec0_column_k_idx(p)]) != SQLITE_NULL) {
- // IMP: V11875_28713
- vtab_set_error(pVTab, "A value was provided for the hidden \"k\" column.");
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- // Step #1: Insert/get a rowid for this row, from the _rowids table.
- rc = vec0Update_InsertRowidStep(p, argv[2 + VEC0_COLUMN_ID], &rowid);
- if (rc != SQLITE_OK) {
- goto cleanup;
- }
- // Step #2: Find the next "available" position in the _chunks table for this
- // row.
- rc = vec0Update_InsertNextAvailableStep(p, partitionKeyValues,
- &chunk_rowid, &chunk_offset,
- &blobChunksValidity,
- &bufferChunksValidity);
- if (rc != SQLITE_OK) {
- goto cleanup;
- }
- // Step #3: With the next available chunk position, write out all the vectors
- // to their specified location.
- rc = vec0Update_InsertWriteFinalStep(p, chunk_rowid, chunk_offset, rowid,
- vectorDatas, blobChunksValidity,
- bufferChunksValidity);
- if (rc != SQLITE_OK) {
- goto cleanup;
- }
- if(p->numAuxiliaryColumns > 0) {
- sqlite3_stmt *stmt;
- sqlite3_str * s = sqlite3_str_new(NULL);
- sqlite3_str_appendf(s, "INSERT INTO " VEC0_SHADOW_AUXILIARY_NAME "(rowid ", p->schemaName, p->tableName);
- for(int i = 0; i < p->numAuxiliaryColumns; i++) {
- sqlite3_str_appendf(s, ", value%02d", i);
- }
- sqlite3_str_appendall(s, ") VALUES (? ");
- for(int i = 0; i < p->numAuxiliaryColumns; i++) {
- sqlite3_str_appendall(s, ", ?");
- }
- sqlite3_str_appendall(s, ")");
- char * zSql = sqlite3_str_finish(s);
- // TODO double check error handling ehre
- if(!zSql) {
- rc = SQLITE_NOMEM;
- goto cleanup;
- }
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
- if(rc != SQLITE_OK) {
- goto cleanup;
- }
- sqlite3_bind_int64(stmt, 1, rowid);
- for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
- if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY) {
- continue;
- }
- int auxiliary_key_idx = p->user_column_idxs[i];
- sqlite3_value * v = argv[2+VEC0_COLUMN_USERN_START + i];
- int v_type = sqlite3_value_type(v);
- if(v_type != SQLITE_NULL && (v_type != p->auxiliary_columns[auxiliary_key_idx].type)) {
- sqlite3_finalize(stmt);
- rc = SQLITE_CONSTRAINT;
- vtab_set_error(
- pVTab,
- "Auxiliary column type mismatch: The auxiliary column %.*s has type %s, but %s was provided.",
- p->auxiliary_columns[auxiliary_key_idx].name_length,
- p->auxiliary_columns[auxiliary_key_idx].name,
- type_name(p->auxiliary_columns[auxiliary_key_idx].type),
- type_name(v_type)
- );
- goto cleanup;
- }
- // first 1 is for 1-based indexing on sqlite3_bind_*, second 1 is to account for initial rowid parameter
- sqlite3_bind_value(stmt, 1 + 1 + auxiliary_key_idx, v);
- }
- rc = sqlite3_step(stmt);
- if(rc != SQLITE_DONE) {
- sqlite3_finalize(stmt);
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- sqlite3_finalize(stmt);
- }
- for(int i = 0; i < vec0_num_defined_user_columns(p); i++) {
- if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_METADATA) {
- continue;
- }
- int metadata_idx = p->user_column_idxs[i];
- sqlite3_value *v = argv[2 + VEC0_COLUMN_USERN_START + i];
- rc = vec0_write_metadata_value(p, metadata_idx, rowid, chunk_rowid, chunk_offset, v, 0);
- if(rc != SQLITE_OK) {
- goto cleanup;
- }
- }
- *pRowid = rowid;
- rc = SQLITE_OK;
- cleanup:
- for (int i = 0; i < numReadVectors; i++) {
- cleanups[i](vectorDatas[i]);
- }
- sqlite3_free((void *)bufferChunksValidity);
- int brc = sqlite3_blob_close(blobChunksValidity);
- if ((rc == SQLITE_OK) && (brc != SQLITE_OK)) {
- vtab_set_error(&p->base,
- VEC_INTERAL_ERROR "unknown error, blobChunksValidity could "
- "not be closed, please file an issue");
- return brc;
- }
- return rc;
- }
- int vec0Update_Delete_ClearValidity(vec0_vtab *p, i64 chunk_id,
- u64 chunk_offset) {
- int rc, brc;
- sqlite3_blob *blobChunksValidity = NULL;
- char unsigned bx;
- int validityOffset = chunk_offset / CHAR_BIT;
- // 2. ensure chunks.validity bit is 1, then set to 0
- rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName, "validity",
- chunk_id, 1, &blobChunksValidity);
- if (rc != SQLITE_OK) {
- // IMP: V26002_10073
- vtab_set_error(&p->base, "could not open validity blob for %s.%s.%lld",
- p->schemaName, p->shadowChunksName, chunk_id);
- return SQLITE_ERROR;
- }
- // will skip the sqlite3_blob_bytes(blobChunksValidity) check for now,
- // the read below would catch it
- rc = sqlite3_blob_read(blobChunksValidity, &bx, sizeof(bx), validityOffset);
- if (rc != SQLITE_OK) {
- // IMP: V21193_05263
- vtab_set_error(
- &p->base, "could not read validity blob for %s.%s.%lld at %d",
- p->schemaName, p->shadowChunksName, chunk_id, validityOffset);
- goto cleanup;
- }
- if (!(bx >> (chunk_offset % CHAR_BIT))) {
- // IMP: V21193_05263
- rc = SQLITE_ERROR;
- vtab_set_error(
- &p->base,
- "vec0 deletion error: validity bit is not set for %s.%s.%lld at %d",
- p->schemaName, p->shadowChunksName, chunk_id, validityOffset);
- goto cleanup;
- }
- char unsigned mask = ~(1 << (chunk_offset % CHAR_BIT));
- char result = bx & mask;
- rc = sqlite3_blob_write(blobChunksValidity, &result, sizeof(bx),
- validityOffset);
- if (rc != SQLITE_OK) {
- vtab_set_error(
- &p->base, "could not write to validity blob for %s.%s.%lld at %d",
- p->schemaName, p->shadowChunksName, chunk_id, validityOffset);
- goto cleanup;
- }
- cleanup:
- brc = sqlite3_blob_close(blobChunksValidity);
- if (rc != SQLITE_OK)
- return rc;
- if (brc != SQLITE_OK) {
- vtab_set_error(&p->base,
- "vec0 deletion error: Error commiting validity blob "
- "transaction on %s.%s.%lld at %d",
- p->schemaName, p->shadowChunksName, chunk_id,
- validityOffset);
- return brc;
- }
- return SQLITE_OK;
- }
- int vec0Update_Delete_DeleteRowids(vec0_vtab *p, i64 rowid) {
- int rc;
- sqlite3_stmt *stmt = NULL;
- char *zSql =
- sqlite3_mprintf("DELETE FROM " VEC0_SHADOW_ROWIDS_NAME " WHERE rowid = ?",
- p->schemaName, p->tableName);
- if (!zSql) {
- return SQLITE_NOMEM;
- }
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
- sqlite3_free(zSql);
- if (rc != SQLITE_OK) {
- goto cleanup;
- }
- sqlite3_bind_int64(stmt, 1, rowid);
- rc = sqlite3_step(stmt);
- if (rc != SQLITE_DONE) {
- goto cleanup;
- }
- rc = SQLITE_OK;
- cleanup:
- sqlite3_finalize(stmt);
- return rc;
- }
- int vec0Update_Delete_DeleteAux(vec0_vtab *p, i64 rowid) {
- int rc;
- sqlite3_stmt *stmt = NULL;
- char *zSql =
- sqlite3_mprintf("DELETE FROM " VEC0_SHADOW_AUXILIARY_NAME " WHERE rowid = ?",
- p->schemaName, p->tableName);
- if (!zSql) {
- return SQLITE_NOMEM;
- }
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
- sqlite3_free(zSql);
- if (rc != SQLITE_OK) {
- goto cleanup;
- }
- sqlite3_bind_int64(stmt, 1, rowid);
- rc = sqlite3_step(stmt);
- if (rc != SQLITE_DONE) {
- goto cleanup;
- }
- rc = SQLITE_OK;
- cleanup:
- sqlite3_finalize(stmt);
- return rc;
- }
- int vec0Update_Delete_ClearMetadata(vec0_vtab *p, int metadata_idx, i64 rowid, i64 chunk_id,
- u64 chunk_offset) {
- int rc;
- sqlite3_blob * blobValue;
- vec0_metadata_column_kind kind = p->metadata_columns[metadata_idx].kind;
- rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_idx], "data", chunk_id, 1, &blobValue);
- if(rc != SQLITE_OK) {
- return rc;
- }
- switch(kind) {
- case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
- u8 block;
- rc = sqlite3_blob_read(blobValue, &block, sizeof(u8), (int) (chunk_offset / CHAR_BIT));
- if(rc != SQLITE_OK) {
- goto done;
- }
- block &= ~(1 << (chunk_offset % CHAR_BIT));
- rc = sqlite3_blob_write(blobValue, &block, sizeof(u8), chunk_offset / CHAR_BIT);
- break;
- }
- case VEC0_METADATA_COLUMN_KIND_INTEGER: {
- i64 v = 0;
- rc = sqlite3_blob_write(blobValue, &v, sizeof(v), chunk_offset * sizeof(i64));
- break;
- }
- case VEC0_METADATA_COLUMN_KIND_FLOAT: {
- double v = 0;
- rc = sqlite3_blob_write(blobValue, &v, sizeof(v), chunk_offset * sizeof(double));
- break;
- }
- case VEC0_METADATA_COLUMN_KIND_TEXT: {
- int n;
- rc = sqlite3_blob_read(blobValue, &n, sizeof(int), chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
- if(rc != SQLITE_OK) {
- goto done;
- }
- u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
- memset(view, 0, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
- rc = sqlite3_blob_write(blobValue, &view, sizeof(view), chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
- if(rc != SQLITE_OK) {
- goto done;
- }
- if(n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
- const char * zSql = sqlite3_mprintf("DELETE FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " WHERE rowid = ?", p->schemaName, p->tableName, metadata_idx);
- if(!zSql) {
- rc = SQLITE_NOMEM;
- goto done;
- }
- sqlite3_stmt * stmt;
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
- if(rc != SQLITE_OK) {
- goto done;
- }
- sqlite3_bind_int64(stmt, 1, rowid);
- rc = sqlite3_step(stmt);
- if(rc != SQLITE_DONE) {
- rc = SQLITE_ERROR;
- goto done;
- }
- sqlite3_finalize(stmt);
- }
- break;
- }
- }
- int rc2;
- done:
- rc2 = sqlite3_blob_close(blobValue);
- if(rc == SQLITE_OK) {
- return rc2;
- }
- return rc;
- }
- int vec0Update_Delete(sqlite3_vtab *pVTab, sqlite3_value *idValue) {
- vec0_vtab *p = (vec0_vtab *)pVTab;
- int rc;
- i64 rowid;
- i64 chunk_id;
- i64 chunk_offset;
- if (p->pkIsText) {
- rc = vec0_rowid_from_id(p, idValue, &rowid);
- if (rc != SQLITE_OK) {
- return rc;
- }
- } else {
- rowid = sqlite3_value_int64(idValue);
- }
- // 1. Find chunk position for given rowid
- // 2. Ensure that validity bit for position is 1, then set to 0
- // 3. Zero out rowid in chunks.rowid
- // 4. Zero out vector data in all vector column chunks
- // 5. Delete value in _rowids table
- // 1. get chunk_id and chunk_offset from _rowids
- rc = vec0_get_chunk_position(p, rowid, NULL, &chunk_id, &chunk_offset);
- if (rc != SQLITE_OK) {
- return rc;
- }
- rc = vec0Update_Delete_ClearValidity(p, chunk_id, chunk_offset);
- if (rc != SQLITE_OK) {
- return rc;
- }
- // 3. zero out rowid in chunks.rowids
- // https://github.com/asg017/sqlite-vec/issues/54
- // 4. zero out any data in vector chunks tables
- // https://github.com/asg017/sqlite-vec/issues/54
- // 5. delete from _rowids table
- rc = vec0Update_Delete_DeleteRowids(p, rowid);
- if (rc != SQLITE_OK) {
- return rc;
- }
- // 6. delete any auxiliary rows
- if(p->numAuxiliaryColumns > 0) {
- rc = vec0Update_Delete_DeleteAux(p, rowid);
- if (rc != SQLITE_OK) {
- return rc;
- }
- }
- // 6. delete metadata
- for(int i = 0; i < p->numMetadataColumns; i++) {
- rc = vec0Update_Delete_ClearMetadata(p, i, rowid, chunk_id, chunk_offset);
- }
- return SQLITE_OK;
- }
- int vec0Update_UpdateAuxColumn(vec0_vtab *p, int auxiliary_column_idx, sqlite3_value * value, i64 rowid) {
- int rc;
- sqlite3_stmt *stmt;
- const char * zSql = sqlite3_mprintf("UPDATE " VEC0_SHADOW_AUXILIARY_NAME " SET value%02d = ? WHERE rowid = ?", p->schemaName, p->tableName, auxiliary_column_idx);
- if(!zSql) {
- return SQLITE_NOMEM;
- }
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
- if(rc != SQLITE_OK) {
- return rc;
- }
- sqlite3_bind_value(stmt, 1, value);
- sqlite3_bind_int64(stmt, 2, rowid);
- rc = sqlite3_step(stmt);
- if(rc != SQLITE_DONE) {
- sqlite3_finalize(stmt);
- return SQLITE_ERROR;
- }
- sqlite3_finalize(stmt);
- return SQLITE_OK;
- }
- int vec0Update_UpdateVectorColumn(vec0_vtab *p, i64 chunk_id, i64 chunk_offset,
- int i, sqlite3_value *valueVector) {
- int rc;
- sqlite3_blob *blobVectors = NULL;
- char *pzError;
- size_t dimensions;
- enum VectorElementType elementType;
- void *vector;
- vector_cleanup cleanup = vector_cleanup_noop;
- // https://github.com/asg017/sqlite-vec/issues/53
- rc = vector_from_value(valueVector, &vector, &dimensions, &elementType,
- &cleanup, &pzError);
- if (rc != SQLITE_OK) {
- // IMP: V15203_32042
- vtab_set_error(
- &p->base, "Updated vector for the \"%.*s\" column is invalid: %z",
- p->vector_columns[i].name_length, p->vector_columns[i].name, pzError);
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- if (elementType != p->vector_columns[i].element_type) {
- // IMP: V03643_20481
- vtab_set_error(
- &p->base,
- "Updated vector for the \"%.*s\" column is expected to be of type "
- "%s, but a %s vector was provided.",
- p->vector_columns[i].name_length, p->vector_columns[i].name,
- vector_subtype_name(p->vector_columns[i].element_type),
- vector_subtype_name(elementType));
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- if (dimensions != p->vector_columns[i].dimensions) {
- // IMP: V25739_09810
- vtab_set_error(
- &p->base,
- "Dimension mismatch for new updated vector for the \"%.*s\" column. "
- "Expected %d dimensions but received %d.",
- p->vector_columns[i].name_length, p->vector_columns[i].name,
- p->vector_columns[i].dimensions, dimensions);
- rc = SQLITE_ERROR;
- goto cleanup;
- }
- rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowVectorChunksNames[i],
- "vectors", chunk_id, 1, &blobVectors);
- if (rc != SQLITE_OK) {
- vtab_set_error(&p->base, "Could not open vectors blob for %s.%s.%lld",
- p->schemaName, p->shadowVectorChunksNames[i], chunk_id);
- goto cleanup;
- }
- rc = vec0_write_vector_to_vector_blob(blobVectors, chunk_offset, vector,
- p->vector_columns[i].dimensions,
- p->vector_columns[i].element_type);
- if (rc != SQLITE_OK) {
- vtab_set_error(&p->base, "Could not write to vectors blob for %s.%s.%lld",
- p->schemaName, p->shadowVectorChunksNames[i], chunk_id);
- goto cleanup;
- }
- cleanup:
- cleanup(vector);
- int brc = sqlite3_blob_close(blobVectors);
- if (rc != SQLITE_OK) {
- return rc;
- }
- if (brc != SQLITE_OK) {
- vtab_set_error(
- &p->base,
- "Could not commit blob transaction for vectors blob for %s.%s.%lld",
- p->schemaName, p->shadowVectorChunksNames[i], chunk_id);
- return brc;
- }
- return SQLITE_OK;
- }
- int vec0Update_Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv) {
- UNUSED_PARAMETER(argc);
- vec0_vtab *p = (vec0_vtab *)pVTab;
- int rc;
- i64 chunk_id;
- i64 chunk_offset;
- i64 rowid;
- if (p->pkIsText) {
- const char *a = (const char *)sqlite3_value_text(argv[0]);
- const char *b = (const char *)sqlite3_value_text(argv[1]);
- // IMP: V08886_25725
- if ((sqlite3_value_bytes(argv[0]) != sqlite3_value_bytes(argv[1])) ||
- strncmp(a, b, sqlite3_value_bytes(argv[0])) != 0) {
- vtab_set_error(pVTab,
- "UPDATEs on vec0 primary key values are not allowed.");
- return SQLITE_ERROR;
- }
- rc = vec0_rowid_from_id(p, argv[0], &rowid);
- if (rc != SQLITE_OK) {
- return rc;
- }
- } else {
- rowid = sqlite3_value_int64(argv[0]);
- }
- // 1) get chunk_id and chunk_offset from _rowids
- rc = vec0_get_chunk_position(p, rowid, NULL, &chunk_id, &chunk_offset);
- if (rc != SQLITE_OK) {
- return rc;
- }
- // 2) update any partition key values
- for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
- if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_PARTITION) {
- continue;
- }
- sqlite3_value * value = argv[2+VEC0_COLUMN_USERN_START + i];
- if(sqlite3_value_nochange(value)) {
- continue;
- }
- vtab_set_error(pVTab, "UPDATE on partition key columns are not supported yet. ");
- return SQLITE_ERROR;
- }
- // 3) handle auxiliary column updates
- for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
- if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY) {
- continue;
- }
- int auxiliary_column_idx = p->user_column_idxs[i];
- sqlite3_value * value = argv[2+VEC0_COLUMN_USERN_START + i];
- if(sqlite3_value_nochange(value)) {
- continue;
- }
- rc = vec0Update_UpdateAuxColumn(p, auxiliary_column_idx, value, rowid);
- if(rc != SQLITE_OK) {
- return SQLITE_ERROR;
- }
- }
- // 4) handle metadata column updates
- for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
- if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_METADATA) {
- continue;
- }
- int metadata_column_idx = p->user_column_idxs[i];
- sqlite3_value * value = argv[2+VEC0_COLUMN_USERN_START + i];
- if(sqlite3_value_nochange(value)) {
- continue;
- }
- rc = vec0_write_metadata_value(p, metadata_column_idx, rowid, chunk_id, chunk_offset, value, 1);
- if(rc != SQLITE_OK) {
- return rc;
- }
- }
- // 5) iterate over all new vectors, update the vectors
- for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
- if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_VECTOR) {
- continue;
- }
- int vector_idx = p->user_column_idxs[i];
- sqlite3_value *valueVector = argv[2 + VEC0_COLUMN_USERN_START + i];
- // in vec0Column, we check sqlite3_vtab_nochange() on vector columns.
- // If the vector column isn't being changed, we return NULL;
- // That's not great, that means vector columns can never be NULLABLE
- // (bc we cant distinguish if an updated vector is truly NULL or nochange).
- // Also it means that if someone tries to run `UPDATE v SET X = NULL`,
- // we can't effectively detect and raise an error.
- // A better solution would be to use a custom result_type for "empty",
- // but subtypes don't appear to survive xColumn -> xUpdate, it's always 0.
- // So for now, we'll just use NULL and warn people to not SET X = NULL
- // in the docs.
- if (sqlite3_value_type(valueVector) == SQLITE_NULL) {
- continue;
- }
- rc = vec0Update_UpdateVectorColumn(p, chunk_id, chunk_offset, vector_idx,
- valueVector);
- if (rc != SQLITE_OK) {
- return SQLITE_ERROR;
- }
- }
- return SQLITE_OK;
- }
- static int vec0Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv,
- sqlite_int64 *pRowid) {
- // DELETE operation
- if (argc == 1 && sqlite3_value_type(argv[0]) != SQLITE_NULL) {
- return vec0Update_Delete(pVTab, argv[0]);
- }
- // INSERT operation
- else if (argc > 1 && sqlite3_value_type(argv[0]) == SQLITE_NULL) {
- return vec0Update_Insert(pVTab, argc, argv, pRowid);
- }
- // UPDATE operation
- else if (argc > 1 && sqlite3_value_type(argv[0]) != SQLITE_NULL) {
- return vec0Update_Update(pVTab, argc, argv);
- } else {
- vtab_set_error(pVTab, "Unrecognized xUpdate operation provided for vec0.");
- return SQLITE_ERROR;
- }
- }
- static int vec0ShadowName(const char *zName) {
- static const char *azName[] = {
- "rowids", "chunks", "auxiliary", "info",
- // Up to VEC0_MAX_METADATA_COLUMNS
- // TODO be smarter about this man
- "metadatachunks00",
- "metadatachunks01",
- "metadatachunks02",
- "metadatachunks03",
- "metadatachunks04",
- "metadatachunks05",
- "metadatachunks06",
- "metadatachunks07",
- "metadatachunks08",
- "metadatachunks09",
- "metadatachunks10",
- "metadatachunks11",
- "metadatachunks12",
- "metadatachunks13",
- "metadatachunks14",
- "metadatachunks15",
- // Up to
- "metadatatext00",
- "metadatatext01",
- "metadatatext02",
- "metadatatext03",
- "metadatatext04",
- "metadatatext05",
- "metadatatext06",
- "metadatatext07",
- "metadatatext08",
- "metadatatext09",
- "metadatatext10",
- "metadatatext11",
- "metadatatext12",
- "metadatatext13",
- "metadatatext14",
- "metadatatext15",
- };
- for (size_t i = 0; i < sizeof(azName) / sizeof(azName[0]); i++) {
- if (sqlite3_stricmp(zName, azName[i]) == 0)
- return 1;
- }
- //for(size_t i = 0; i < )"vector_chunks", "metadatachunks"
- return 0;
- }
- static int vec0Begin(sqlite3_vtab *pVTab) {
- UNUSED_PARAMETER(pVTab);
- return SQLITE_OK;
- }
- static int vec0Sync(sqlite3_vtab *pVTab) {
- UNUSED_PARAMETER(pVTab);
- vec0_vtab *p = (vec0_vtab *)pVTab;
- if (p->stmtLatestChunk) {
- sqlite3_finalize(p->stmtLatestChunk);
- p->stmtLatestChunk = NULL;
- }
- if (p->stmtRowidsInsertRowid) {
- sqlite3_finalize(p->stmtRowidsInsertRowid);
- p->stmtRowidsInsertRowid = NULL;
- }
- if (p->stmtRowidsInsertId) {
- sqlite3_finalize(p->stmtRowidsInsertId);
- p->stmtRowidsInsertId = NULL;
- }
- if (p->stmtRowidsUpdatePosition) {
- sqlite3_finalize(p->stmtRowidsUpdatePosition);
- p->stmtRowidsUpdatePosition = NULL;
- }
- if (p->stmtRowidsGetChunkPosition) {
- sqlite3_finalize(p->stmtRowidsGetChunkPosition);
- p->stmtRowidsGetChunkPosition = NULL;
- }
- return SQLITE_OK;
- }
- static int vec0Commit(sqlite3_vtab *pVTab) {
- UNUSED_PARAMETER(pVTab);
- return SQLITE_OK;
- }
- static int vec0Rollback(sqlite3_vtab *pVTab) {
- UNUSED_PARAMETER(pVTab);
- return SQLITE_OK;
- }
- static sqlite3_module vec0Module = {
- /* iVersion */ 3,
- /* xCreate */ vec0Create,
- /* xConnect */ vec0Connect,
- /* xBestIndex */ vec0BestIndex,
- /* xDisconnect */ vec0Disconnect,
- /* xDestroy */ vec0Destroy,
- /* xOpen */ vec0Open,
- /* xClose */ vec0Close,
- /* xFilter */ vec0Filter,
- /* xNext */ vec0Next,
- /* xEof */ vec0Eof,
- /* xColumn */ vec0Column,
- /* xRowid */ vec0Rowid,
- /* xUpdate */ vec0Update,
- /* xBegin */ vec0Begin,
- /* xSync */ vec0Sync,
- /* xCommit */ vec0Commit,
- /* xRollback */ vec0Rollback,
- /* xFindFunction */ 0,
- /* xRename */ 0, // https://github.com/asg017/sqlite-vec/issues/43
- /* xSavepoint */ 0,
- /* xRelease */ 0,
- /* xRollbackTo */ 0,
- /* xShadowName */ vec0ShadowName,
- #if SQLITE_VERSION_NUMBER >= 3044000
- /* xIntegrity */ 0, // https://github.com/asg017/sqlite-vec/issues/44
- #endif
- };
- #pragma endregion
- static char *POINTER_NAME_STATIC_BLOB_DEF = "vec0-static_blob_def";
- struct static_blob_definition {
- void *p;
- size_t dimensions;
- size_t nvectors;
- enum VectorElementType element_type;
- };
- static void vec_static_blob_from_raw(sqlite3_context *context, int argc,
- sqlite3_value **argv) {
- assert(argc == 4);
- struct static_blob_definition *p;
- p = sqlite3_malloc(sizeof(*p));
- if (!p) {
- sqlite3_result_error_nomem(context);
- return;
- }
- memset(p, 0, sizeof(*p));
- p->p = (void *)sqlite3_value_int64(argv[0]);
- p->element_type = SQLITE_VEC_ELEMENT_TYPE_FLOAT32;
- p->dimensions = sqlite3_value_int64(argv[2]);
- p->nvectors = sqlite3_value_int64(argv[3]);
- sqlite3_result_pointer(context, p, POINTER_NAME_STATIC_BLOB_DEF,
- sqlite3_free);
- }
- #pragma region vec_static_blobs() table function
- #define MAX_STATIC_BLOBS 16
- typedef struct static_blob static_blob;
- struct static_blob {
- char *name;
- void *p;
- size_t dimensions;
- size_t nvectors;
- enum VectorElementType element_type;
- };
- typedef struct vec_static_blob_data vec_static_blob_data;
- struct vec_static_blob_data {
- static_blob static_blobs[MAX_STATIC_BLOBS];
- };
- typedef struct vec_static_blobs_vtab vec_static_blobs_vtab;
- struct vec_static_blobs_vtab {
- sqlite3_vtab base;
- vec_static_blob_data *data;
- };
- typedef struct vec_static_blobs_cursor vec_static_blobs_cursor;
- struct vec_static_blobs_cursor {
- sqlite3_vtab_cursor base;
- sqlite3_int64 iRowid;
- };
- static int vec_static_blobsConnect(sqlite3 *db, void *pAux, int argc,
- const char *const *argv,
- sqlite3_vtab **ppVtab, char **pzErr) {
- UNUSED_PARAMETER(argc);
- UNUSED_PARAMETER(argv);
- UNUSED_PARAMETER(pzErr);
- vec_static_blobs_vtab *pNew;
- #define VEC_STATIC_BLOBS_NAME 0
- #define VEC_STATIC_BLOBS_DATA 1
- #define VEC_STATIC_BLOBS_DIMENSIONS 2
- #define VEC_STATIC_BLOBS_COUNT 3
- int rc = sqlite3_declare_vtab(
- db, "CREATE TABLE x(name, data, dimensions hidden, count hidden)");
- if (rc == SQLITE_OK) {
- pNew = sqlite3_malloc(sizeof(*pNew));
- *ppVtab = (sqlite3_vtab *)pNew;
- if (pNew == 0)
- return SQLITE_NOMEM;
- memset(pNew, 0, sizeof(*pNew));
- pNew->data = pAux;
- }
- return rc;
- }
- static int vec_static_blobsDisconnect(sqlite3_vtab *pVtab) {
- vec_static_blobs_vtab *p = (vec_static_blobs_vtab *)pVtab;
- sqlite3_free(p);
- return SQLITE_OK;
- }
- static int vec_static_blobsUpdate(sqlite3_vtab *pVTab, int argc,
- sqlite3_value **argv, sqlite_int64 *pRowid) {
- UNUSED_PARAMETER(pRowid);
- vec_static_blobs_vtab *p = (vec_static_blobs_vtab *)pVTab;
- // DELETE operation
- if (argc == 1 && sqlite3_value_type(argv[0]) != SQLITE_NULL) {
- return SQLITE_ERROR;
- }
- // INSERT operation
- else if (argc > 1 && sqlite3_value_type(argv[0]) == SQLITE_NULL) {
- const char *key =
- (const char *)sqlite3_value_text(argv[2 + VEC_STATIC_BLOBS_NAME]);
- int idx = -1;
- for (int i = 0; i < MAX_STATIC_BLOBS; i++) {
- if (!p->data->static_blobs[i].name) {
- p->data->static_blobs[i].name = sqlite3_mprintf("%s", key);
- idx = i;
- break;
- }
- }
- if (idx < 0)
- abort();
- struct static_blob_definition *def = sqlite3_value_pointer(
- argv[2 + VEC_STATIC_BLOBS_DATA], POINTER_NAME_STATIC_BLOB_DEF);
- p->data->static_blobs[idx].p = def->p;
- p->data->static_blobs[idx].dimensions = def->dimensions;
- p->data->static_blobs[idx].nvectors = def->nvectors;
- p->data->static_blobs[idx].element_type = def->element_type;
- return SQLITE_OK;
- }
- // UPDATE operation
- else if (argc > 1 && sqlite3_value_type(argv[0]) != SQLITE_NULL) {
- return SQLITE_ERROR;
- }
- return SQLITE_ERROR;
- }
- static int vec_static_blobsOpen(sqlite3_vtab *p,
- sqlite3_vtab_cursor **ppCursor) {
- UNUSED_PARAMETER(p);
- vec_static_blobs_cursor *pCur;
- pCur = sqlite3_malloc(sizeof(*pCur));
- if (pCur == 0)
- return SQLITE_NOMEM;
- memset(pCur, 0, sizeof(*pCur));
- *ppCursor = &pCur->base;
- return SQLITE_OK;
- }
- static int vec_static_blobsClose(sqlite3_vtab_cursor *cur) {
- vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur;
- sqlite3_free(pCur);
- return SQLITE_OK;
- }
- static int vec_static_blobsBestIndex(sqlite3_vtab *pVTab,
- sqlite3_index_info *pIdxInfo) {
- UNUSED_PARAMETER(pVTab);
- pIdxInfo->idxNum = 1;
- pIdxInfo->estimatedCost = (double)10;
- pIdxInfo->estimatedRows = 10;
- return SQLITE_OK;
- }
- static int vec_static_blobsNext(sqlite3_vtab_cursor *cur);
- static int vec_static_blobsFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum,
- const char *idxStr, int argc,
- sqlite3_value **argv) {
- UNUSED_PARAMETER(idxNum);
- UNUSED_PARAMETER(idxStr);
- UNUSED_PARAMETER(argc);
- UNUSED_PARAMETER(argv);
- vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)pVtabCursor;
- pCur->iRowid = -1;
- vec_static_blobsNext(pVtabCursor);
- return SQLITE_OK;
- }
- static int vec_static_blobsRowid(sqlite3_vtab_cursor *cur,
- sqlite_int64 *pRowid) {
- vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur;
- *pRowid = pCur->iRowid;
- return SQLITE_OK;
- }
- static int vec_static_blobsNext(sqlite3_vtab_cursor *cur) {
- vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur;
- vec_static_blobs_vtab *p = (vec_static_blobs_vtab *)pCur->base.pVtab;
- pCur->iRowid++;
- while (pCur->iRowid < MAX_STATIC_BLOBS) {
- if (p->data->static_blobs[pCur->iRowid].name) {
- return SQLITE_OK;
- }
- pCur->iRowid++;
- }
- return SQLITE_OK;
- }
- static int vec_static_blobsEof(sqlite3_vtab_cursor *cur) {
- vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur;
- return pCur->iRowid >= MAX_STATIC_BLOBS;
- }
- static int vec_static_blobsColumn(sqlite3_vtab_cursor *cur,
- sqlite3_context *context, int i) {
- vec_static_blobs_cursor *pCur = (vec_static_blobs_cursor *)cur;
- vec_static_blobs_vtab *p = (vec_static_blobs_vtab *)cur->pVtab;
- switch (i) {
- case VEC_STATIC_BLOBS_NAME:
- sqlite3_result_text(context, p->data->static_blobs[pCur->iRowid].name, -1,
- SQLITE_TRANSIENT);
- break;
- case VEC_STATIC_BLOBS_DATA:
- sqlite3_result_null(context);
- break;
- case VEC_STATIC_BLOBS_DIMENSIONS:
- sqlite3_result_int64(context,
- p->data->static_blobs[pCur->iRowid].dimensions);
- break;
- case VEC_STATIC_BLOBS_COUNT:
- sqlite3_result_int64(context, p->data->static_blobs[pCur->iRowid].nvectors);
- break;
- }
- return SQLITE_OK;
- }
- static sqlite3_module vec_static_blobsModule = {
- /* iVersion */ 3,
- /* xCreate */ 0,
- /* xConnect */ vec_static_blobsConnect,
- /* xBestIndex */ vec_static_blobsBestIndex,
- /* xDisconnect */ vec_static_blobsDisconnect,
- /* xDestroy */ 0,
- /* xOpen */ vec_static_blobsOpen,
- /* xClose */ vec_static_blobsClose,
- /* xFilter */ vec_static_blobsFilter,
- /* xNext */ vec_static_blobsNext,
- /* xEof */ vec_static_blobsEof,
- /* xColumn */ vec_static_blobsColumn,
- /* xRowid */ vec_static_blobsRowid,
- /* xUpdate */ vec_static_blobsUpdate,
- /* xBegin */ 0,
- /* xSync */ 0,
- /* xCommit */ 0,
- /* xRollback */ 0,
- /* xFindMethod */ 0,
- /* xRename */ 0,
- /* xSavepoint */ 0,
- /* xRelease */ 0,
- /* xRollbackTo */ 0,
- /* xShadowName */ 0,
- #if SQLITE_VERSION_NUMBER >= 3044000
- /* xIntegrity */ 0
- #endif
- };
- #pragma endregion
- #pragma region vec_static_blob_entries() table function
- typedef struct vec_static_blob_entries_vtab vec_static_blob_entries_vtab;
- struct vec_static_blob_entries_vtab {
- sqlite3_vtab base;
- static_blob *blob;
- };
- typedef enum {
- VEC_SBE__QUERYPLAN_FULLSCAN = 1,
- VEC_SBE__QUERYPLAN_KNN = 2
- } vec_sbe_query_plan;
- struct sbe_query_knn_data {
- i64 k;
- i64 k_used;
- // Array of rowids of size k. Must be freed with sqlite3_free().
- i32 *rowids;
- // Array of distances of size k. Must be freed with sqlite3_free().
- f32 *distances;
- i64 current_idx;
- };
- void sbe_query_knn_data_clear(struct sbe_query_knn_data *knn_data) {
- if (!knn_data)
- return;
- if (knn_data->rowids) {
- sqlite3_free(knn_data->rowids);
- knn_data->rowids = NULL;
- }
- if (knn_data->distances) {
- sqlite3_free(knn_data->distances);
- knn_data->distances = NULL;
- }
- }
- typedef struct vec_static_blob_entries_cursor vec_static_blob_entries_cursor;
- struct vec_static_blob_entries_cursor {
- sqlite3_vtab_cursor base;
- sqlite3_int64 iRowid;
- vec_sbe_query_plan query_plan;
- struct sbe_query_knn_data *knn_data;
- };
- static int vec_static_blob_entriesConnect(sqlite3 *db, void *pAux, int argc,
- const char *const *argv,
- sqlite3_vtab **ppVtab, char **pzErr) {
- UNUSED_PARAMETER(argc);
- UNUSED_PARAMETER(argv);
- UNUSED_PARAMETER(pzErr);
- vec_static_blob_data *blob_data = pAux;
- int idx = -1;
- for (int i = 0; i < MAX_STATIC_BLOBS; i++) {
- if (!blob_data->static_blobs[i].name)
- continue;
- if (strncmp(blob_data->static_blobs[i].name, argv[3],
- strlen(blob_data->static_blobs[i].name)) == 0) {
- idx = i;
- break;
- }
- }
- if (idx < 0)
- abort();
- vec_static_blob_entries_vtab *pNew;
- #define VEC_STATIC_BLOB_ENTRIES_VECTOR 0
- #define VEC_STATIC_BLOB_ENTRIES_DISTANCE 1
- #define VEC_STATIC_BLOB_ENTRIES_K 2
- int rc = sqlite3_declare_vtab(
- db, "CREATE TABLE x(vector, distance hidden, k hidden)");
- if (rc == SQLITE_OK) {
- pNew = sqlite3_malloc(sizeof(*pNew));
- *ppVtab = (sqlite3_vtab *)pNew;
- if (pNew == 0)
- return SQLITE_NOMEM;
- memset(pNew, 0, sizeof(*pNew));
- pNew->blob = &blob_data->static_blobs[idx];
- }
- return rc;
- }
- static int vec_static_blob_entriesCreate(sqlite3 *db, void *pAux, int argc,
- const char *const *argv,
- sqlite3_vtab **ppVtab, char **pzErr) {
- return vec_static_blob_entriesConnect(db, pAux, argc, argv, ppVtab, pzErr);
- }
- static int vec_static_blob_entriesDisconnect(sqlite3_vtab *pVtab) {
- vec_static_blob_entries_vtab *p = (vec_static_blob_entries_vtab *)pVtab;
- sqlite3_free(p);
- return SQLITE_OK;
- }
- static int vec_static_blob_entriesOpen(sqlite3_vtab *p,
- sqlite3_vtab_cursor **ppCursor) {
- UNUSED_PARAMETER(p);
- vec_static_blob_entries_cursor *pCur;
- pCur = sqlite3_malloc(sizeof(*pCur));
- if (pCur == 0)
- return SQLITE_NOMEM;
- memset(pCur, 0, sizeof(*pCur));
- *ppCursor = &pCur->base;
- return SQLITE_OK;
- }
- static int vec_static_blob_entriesClose(sqlite3_vtab_cursor *cur) {
- vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur;
- sqlite3_free(pCur->knn_data);
- sqlite3_free(pCur);
- return SQLITE_OK;
- }
- static int vec_static_blob_entriesBestIndex(sqlite3_vtab *pVTab,
- sqlite3_index_info *pIdxInfo) {
- vec_static_blob_entries_vtab *p = (vec_static_blob_entries_vtab *)pVTab;
- int iMatchTerm = -1;
- int iLimitTerm = -1;
- // int iRowidTerm = -1; // https://github.com/asg017/sqlite-vec/issues/47
- int iKTerm = -1;
- for (int i = 0; i < pIdxInfo->nConstraint; i++) {
- if (!pIdxInfo->aConstraint[i].usable)
- continue;
- int iColumn = pIdxInfo->aConstraint[i].iColumn;
- int op = pIdxInfo->aConstraint[i].op;
- if (op == SQLITE_INDEX_CONSTRAINT_MATCH &&
- iColumn == VEC_STATIC_BLOB_ENTRIES_VECTOR) {
- if (iMatchTerm > -1) {
- // https://github.com/asg017/sqlite-vec/issues/51
- return SQLITE_ERROR;
- }
- iMatchTerm = i;
- }
- if (op == SQLITE_INDEX_CONSTRAINT_LIMIT) {
- iLimitTerm = i;
- }
- if (op == SQLITE_INDEX_CONSTRAINT_EQ &&
- iColumn == VEC_STATIC_BLOB_ENTRIES_K) {
- iKTerm = i;
- }
- }
- if (iMatchTerm >= 0) {
- if (iLimitTerm < 0 && iKTerm < 0) {
- // https://github.com/asg017/sqlite-vec/issues/51
- return SQLITE_ERROR;
- }
- if (iLimitTerm >= 0 && iKTerm >= 0) {
- return SQLITE_ERROR; // limit or k, not both
- }
- if (pIdxInfo->nOrderBy < 1) {
- vtab_set_error(pVTab, "ORDER BY distance required");
- return SQLITE_CONSTRAINT;
- }
- if (pIdxInfo->nOrderBy > 1) {
- // https://github.com/asg017/sqlite-vec/issues/51
- vtab_set_error(pVTab, "more than 1 ORDER BY clause provided");
- return SQLITE_CONSTRAINT;
- }
- if (pIdxInfo->aOrderBy[0].iColumn != VEC_STATIC_BLOB_ENTRIES_DISTANCE) {
- vtab_set_error(pVTab, "ORDER BY must be on the distance column");
- return SQLITE_CONSTRAINT;
- }
- if (pIdxInfo->aOrderBy[0].desc) {
- vtab_set_error(pVTab,
- "Only ascending in ORDER BY distance clause is supported, "
- "DESC is not supported yet.");
- return SQLITE_CONSTRAINT;
- }
- pIdxInfo->idxNum = VEC_SBE__QUERYPLAN_KNN;
- pIdxInfo->estimatedCost = (double)10;
- pIdxInfo->estimatedRows = 10;
- pIdxInfo->orderByConsumed = 1;
- pIdxInfo->aConstraintUsage[iMatchTerm].argvIndex = 1;
- pIdxInfo->aConstraintUsage[iMatchTerm].omit = 1;
- if (iLimitTerm >= 0) {
- pIdxInfo->aConstraintUsage[iLimitTerm].argvIndex = 2;
- pIdxInfo->aConstraintUsage[iLimitTerm].omit = 1;
- } else {
- pIdxInfo->aConstraintUsage[iKTerm].argvIndex = 2;
- pIdxInfo->aConstraintUsage[iKTerm].omit = 1;
- }
- } else {
- pIdxInfo->idxNum = VEC_SBE__QUERYPLAN_FULLSCAN;
- pIdxInfo->estimatedCost = (double)p->blob->nvectors;
- pIdxInfo->estimatedRows = p->blob->nvectors;
- }
- return SQLITE_OK;
- }
- static int vec_static_blob_entriesFilter(sqlite3_vtab_cursor *pVtabCursor,
- int idxNum, const char *idxStr,
- int argc, sqlite3_value **argv) {
- UNUSED_PARAMETER(idxStr);
- assert(argc >= 0 && argc <= 3);
- vec_static_blob_entries_cursor *pCur =
- (vec_static_blob_entries_cursor *)pVtabCursor;
- vec_static_blob_entries_vtab *p =
- (vec_static_blob_entries_vtab *)pCur->base.pVtab;
- if (idxNum == VEC_SBE__QUERYPLAN_KNN) {
- assert(argc == 2);
- pCur->query_plan = VEC_SBE__QUERYPLAN_KNN;
- struct sbe_query_knn_data *knn_data;
- knn_data = sqlite3_malloc(sizeof(*knn_data));
- if (!knn_data) {
- return SQLITE_NOMEM;
- }
- memset(knn_data, 0, sizeof(*knn_data));
- void *queryVector;
- size_t dimensions;
- enum VectorElementType elementType;
- vector_cleanup cleanup;
- char *err;
- int rc = vector_from_value(argv[0], &queryVector, &dimensions, &elementType,
- &cleanup, &err);
- if (rc != SQLITE_OK) {
- return SQLITE_ERROR;
- }
- if (elementType != p->blob->element_type) {
- return SQLITE_ERROR;
- }
- if (dimensions != p->blob->dimensions) {
- return SQLITE_ERROR;
- }
- i64 k = min(sqlite3_value_int64(argv[1]), (i64)p->blob->nvectors);
- if (k < 0) {
- // HANDLE https://github.com/asg017/sqlite-vec/issues/55
- return SQLITE_ERROR;
- }
- if (k == 0) {
- knn_data->k = 0;
- pCur->knn_data = knn_data;
- return SQLITE_OK;
- }
- size_t bsize = (p->blob->nvectors + 7) & ~7;
- i32 *topk_rowids = sqlite3_malloc(k * sizeof(i32));
- if (!topk_rowids) {
- // HANDLE https://github.com/asg017/sqlite-vec/issues/55
- return SQLITE_ERROR;
- }
- f32 *distances = sqlite3_malloc(bsize * sizeof(f32));
- if (!distances) {
- // HANDLE https://github.com/asg017/sqlite-vec/issues/55
- return SQLITE_ERROR;
- }
- for (size_t i = 0; i < p->blob->nvectors; i++) {
- // https://github.com/asg017/sqlite-vec/issues/52
- float *v = ((float *)p->blob->p) + (i * p->blob->dimensions);
- distances[i] =
- distance_l2_sqr_float(v, (float *)queryVector, &p->blob->dimensions);
- }
- u8 *candidates = bitmap_new(bsize);
- assert(candidates);
- u8 *taken = bitmap_new(bsize);
- assert(taken);
- bitmap_fill(candidates, bsize);
- for (size_t i = bsize; i >= p->blob->nvectors; i--) {
- bitmap_set(candidates, i, 0);
- }
- i32 k_used = 0;
- min_idx(distances, bsize, candidates, topk_rowids, k, taken, &k_used);
- knn_data->current_idx = 0;
- knn_data->distances = distances;
- knn_data->k = k;
- knn_data->rowids = topk_rowids;
- pCur->knn_data = knn_data;
- } else {
- pCur->query_plan = VEC_SBE__QUERYPLAN_FULLSCAN;
- pCur->iRowid = 0;
- }
- return SQLITE_OK;
- }
- static int vec_static_blob_entriesRowid(sqlite3_vtab_cursor *cur,
- sqlite_int64 *pRowid) {
- vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur;
- switch (pCur->query_plan) {
- case VEC_SBE__QUERYPLAN_FULLSCAN: {
- *pRowid = pCur->iRowid;
- return SQLITE_OK;
- }
- case VEC_SBE__QUERYPLAN_KNN: {
- i32 rowid = ((i32 *)pCur->knn_data->rowids)[pCur->knn_data->current_idx];
- *pRowid = (sqlite3_int64)rowid;
- return SQLITE_OK;
- }
- }
- return SQLITE_ERROR;
- }
- static int vec_static_blob_entriesNext(sqlite3_vtab_cursor *cur) {
- vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur;
- switch (pCur->query_plan) {
- case VEC_SBE__QUERYPLAN_FULLSCAN: {
- pCur->iRowid++;
- return SQLITE_OK;
- }
- case VEC_SBE__QUERYPLAN_KNN: {
- pCur->knn_data->current_idx++;
- return SQLITE_OK;
- }
- }
- return SQLITE_ERROR;
- }
- static int vec_static_blob_entriesEof(sqlite3_vtab_cursor *cur) {
- vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur;
- vec_static_blob_entries_vtab *p =
- (vec_static_blob_entries_vtab *)pCur->base.pVtab;
- switch (pCur->query_plan) {
- case VEC_SBE__QUERYPLAN_FULLSCAN: {
- return (size_t)pCur->iRowid >= p->blob->nvectors;
- }
- case VEC_SBE__QUERYPLAN_KNN: {
- return pCur->knn_data->current_idx >= pCur->knn_data->k;
- }
- }
- return SQLITE_ERROR;
- }
- static int vec_static_blob_entriesColumn(sqlite3_vtab_cursor *cur,
- sqlite3_context *context, int i) {
- vec_static_blob_entries_cursor *pCur = (vec_static_blob_entries_cursor *)cur;
- vec_static_blob_entries_vtab *p = (vec_static_blob_entries_vtab *)cur->pVtab;
- switch (pCur->query_plan) {
- case VEC_SBE__QUERYPLAN_FULLSCAN: {
- switch (i) {
- case VEC_STATIC_BLOB_ENTRIES_VECTOR:
- sqlite3_result_blob(
- context,
- ((unsigned char *)p->blob->p) +
- (pCur->iRowid * p->blob->dimensions * sizeof(float)),
- p->blob->dimensions * sizeof(float), SQLITE_TRANSIENT);
- sqlite3_result_subtype(context, p->blob->element_type);
- break;
- }
- return SQLITE_OK;
- }
- case VEC_SBE__QUERYPLAN_KNN: {
- switch (i) {
- case VEC_STATIC_BLOB_ENTRIES_VECTOR: {
- i32 rowid = ((i32 *)pCur->knn_data->rowids)[pCur->knn_data->current_idx];
- sqlite3_result_blob(context,
- ((unsigned char *)p->blob->p) +
- (rowid * p->blob->dimensions * sizeof(float)),
- p->blob->dimensions * sizeof(float),
- SQLITE_TRANSIENT);
- sqlite3_result_subtype(context, p->blob->element_type);
- break;
- }
- }
- return SQLITE_OK;
- }
- }
- return SQLITE_ERROR;
- }
- static sqlite3_module vec_static_blob_entriesModule = {
- /* iVersion */ 3,
- /* xCreate */
- vec_static_blob_entriesCreate, // handle rm?
- // https://github.com/asg017/sqlite-vec/issues/55
- /* xConnect */ vec_static_blob_entriesConnect,
- /* xBestIndex */ vec_static_blob_entriesBestIndex,
- /* xDisconnect */ vec_static_blob_entriesDisconnect,
- /* xDestroy */ vec_static_blob_entriesDisconnect,
- /* xOpen */ vec_static_blob_entriesOpen,
- /* xClose */ vec_static_blob_entriesClose,
- /* xFilter */ vec_static_blob_entriesFilter,
- /* xNext */ vec_static_blob_entriesNext,
- /* xEof */ vec_static_blob_entriesEof,
- /* xColumn */ vec_static_blob_entriesColumn,
- /* xRowid */ vec_static_blob_entriesRowid,
- /* xUpdate */ 0,
- /* xBegin */ 0,
- /* xSync */ 0,
- /* xCommit */ 0,
- /* xRollback */ 0,
- /* xFindMethod */ 0,
- /* xRename */ 0,
- /* xSavepoint */ 0,
- /* xRelease */ 0,
- /* xRollbackTo */ 0,
- /* xShadowName */ 0,
- #if SQLITE_VERSION_NUMBER >= 3044000
- /* xIntegrity */ 0
- #endif
- };
- #pragma endregion
- #ifdef SQLITE_VEC_ENABLE_AVX
- #define SQLITE_VEC_DEBUG_BUILD_AVX "avx"
- #else
- #define SQLITE_VEC_DEBUG_BUILD_AVX ""
- #endif
- #ifdef SQLITE_VEC_ENABLE_NEON
- #define SQLITE_VEC_DEBUG_BUILD_NEON "neon"
- #else
- #define SQLITE_VEC_DEBUG_BUILD_NEON ""
- #endif
- #define SQLITE_VEC_DEBUG_BUILD \
- SQLITE_VEC_DEBUG_BUILD_AVX " " SQLITE_VEC_DEBUG_BUILD_NEON
- #define SQLITE_VEC_DEBUG_STRING \
- "Version: " SQLITE_VEC_VERSION "\n" \
- "Date: " SQLITE_VEC_DATE "\n" \
- "Commit: " SQLITE_VEC_SOURCE "\n" \
- "Build flags: " SQLITE_VEC_DEBUG_BUILD
- SQLITE_VEC_API int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg,
- const sqlite3_api_routines *pApi) {
- #ifndef SQLITE_CORE
- SQLITE_EXTENSION_INIT2(pApi);
- #endif
- int rc = SQLITE_OK;
- #define DEFAULT_FLAGS (SQLITE_UTF8 | SQLITE_INNOCUOUS | SQLITE_DETERMINISTIC)
- rc = sqlite3_create_function_v2(db, "vec_version", 0, DEFAULT_FLAGS,
- SQLITE_VEC_VERSION, _static_text_func, NULL,
- NULL, NULL);
- if (rc != SQLITE_OK) {
- return rc;
- }
- rc = sqlite3_create_function_v2(db, "vec_debug", 0, DEFAULT_FLAGS,
- SQLITE_VEC_DEBUG_STRING, _static_text_func,
- NULL, NULL, NULL);
- if (rc != SQLITE_OK) {
- return rc;
- }
- static struct {
- const char *zFName;
- void (*xFunc)(sqlite3_context *, int, sqlite3_value **);
- int nArg;
- int flags;
- } aFunc[] = {
- // clang-format off
- //{"vec_version", _static_text_func, 0, DEFAULT_FLAGS, (void *) SQLITE_VEC_VERSION },
- //{"vec_debug", _static_text_func, 0, DEFAULT_FLAGS, (void *) SQLITE_VEC_DEBUG_STRING },
- {"vec_distance_l2", vec_distance_l2, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE, },
- {"vec_distance_l1", vec_distance_l1, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE, },
- {"vec_distance_hamming",vec_distance_hamming, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE, },
- {"vec_distance_cosine", vec_distance_cosine, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE, },
- {"vec_length", vec_length, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE, },
- {"vec_type", vec_type, 1, DEFAULT_FLAGS, },
- {"vec_to_json", vec_to_json, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
- {"vec_add", vec_add, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
- {"vec_sub", vec_sub, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
- {"vec_slice", vec_slice, 3, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
- {"vec_normalize", vec_normalize, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
- {"vec_f32", vec_f32, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
- {"vec_bit", vec_bit, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
- {"vec_int8", vec_int8, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
- {"vec_quantize_int8", vec_quantize_int8, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
- {"vec_quantize_binary", vec_quantize_binary, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
- // clang-format on
- };
- static struct {
- char *name;
- const sqlite3_module *module;
- void *p;
- void (*xDestroy)(void *);
- } aMod[] = {
- // clang-format off
- {"vec0", &vec0Module, NULL, NULL},
- {"vec_each", &vec_eachModule, NULL, NULL},
- // clang-format on
- };
- for (unsigned long i = 0; i < countof(aFunc) && rc == SQLITE_OK; i++) {
- rc = sqlite3_create_function_v2(db, aFunc[i].zFName, aFunc[i].nArg,
- aFunc[i].flags, NULL, aFunc[i].xFunc, NULL,
- NULL, NULL);
- if (rc != SQLITE_OK) {
- *pzErrMsg = sqlite3_mprintf("Error creating function %s: %s",
- aFunc[i].zFName, sqlite3_errmsg(db));
- return rc;
- }
- }
- for (unsigned long i = 0; i < countof(aMod) && rc == SQLITE_OK; i++) {
- rc = sqlite3_create_module_v2(db, aMod[i].name, aMod[i].module, NULL, NULL);
- if (rc != SQLITE_OK) {
- *pzErrMsg = sqlite3_mprintf("Error creating module %s: %s", aMod[i].name,
- sqlite3_errmsg(db));
- return rc;
- }
- }
- return SQLITE_OK;
- }
- #ifndef SQLITE_VEC_OMIT_FS
- SQLITE_VEC_API int sqlite3_vec_numpy_init(sqlite3 *db, char **pzErrMsg,
- const sqlite3_api_routines *pApi) {
- UNUSED_PARAMETER(pzErrMsg);
- #ifndef SQLITE_CORE
- SQLITE_EXTENSION_INIT2(pApi);
- #endif
- int rc = SQLITE_OK;
- rc = sqlite3_create_function_v2(db, "vec_npy_file", 1, SQLITE_RESULT_SUBTYPE,
- NULL, vec_npy_file, NULL, NULL, NULL);
- if(rc != SQLITE_OK) {
- return rc;
- }
- rc = sqlite3_create_module_v2(db, "vec_npy_each", &vec_npy_eachModule, NULL, NULL);
- return rc;
- }
- #endif
- SQLITE_VEC_API int
- sqlite3_vec_static_blobs_init(sqlite3 *db, char **pzErrMsg,
- const sqlite3_api_routines *pApi) {
- UNUSED_PARAMETER(pzErrMsg);
- #ifndef SQLITE_CORE
- SQLITE_EXTENSION_INIT2(pApi);
- #endif
- int rc = SQLITE_OK;
- vec_static_blob_data *static_blob_data;
- static_blob_data = sqlite3_malloc(sizeof(*static_blob_data));
- if (!static_blob_data) {
- return SQLITE_NOMEM;
- }
- memset(static_blob_data, 0, sizeof(*static_blob_data));
- rc = sqlite3_create_function_v2(
- db, "vec_static_blob_from_raw", 4,
- DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, NULL,
- vec_static_blob_from_raw, NULL, NULL, NULL);
- if (rc != SQLITE_OK)
- return rc;
- rc = sqlite3_create_module_v2(db, "vec_static_blobs", &vec_static_blobsModule,
- static_blob_data, sqlite3_free);
- if (rc != SQLITE_OK)
- return rc;
- rc = sqlite3_create_module_v2(db, "vec_static_blob_entries",
- &vec_static_blob_entriesModule,
- static_blob_data, NULL);
- if (rc != SQLITE_OK)
- return rc;
- return rc;
- }
|