postgresqlPackages.vectorchord: init at 0.4.2

This commit is contained in:
Diogo Correia
2025-03-24 13:04:31 +00:00
parent 136519ba0a
commit ab82f5a511
4 changed files with 264 additions and 0 deletions

View File

@@ -0,0 +1,29 @@
diff --git a/crates/simd/build.rs b/crates/simd/build.rs
index 12ce198..aed5588 100644
--- a/crates/simd/build.rs
+++ b/crates/simd/build.rs
@@ -17,17 +17,24 @@ use std::error::Error;
fn main() -> Result<(), Box<dyn Error>> {
println!("cargo::rerun-if-changed=cshim");
+ println!("cargo:rerun-if-env-changed=BINDGEN_EXTRA_CLANG_ARGS");
let target_arch = var("CARGO_CFG_TARGET_ARCH")?;
match target_arch.as_str() {
"aarch64" => {
let mut build = cc::Build::new();
build.file("./cshim/aarch64.c");
+ build.compiler("@clang@");
+ // read env var set by rustPlatform.bindgenHook
+ build.try_flags_from_environment("BINDGEN_EXTRA_CLANG_ARGS").expect("the BINDGEN_EXTRA_CLANG_ARGS environment variable must be specified and UTF-8");
build.opt_level(3);
build.compile("simd_cshim");
}
"x86_64" => {
let mut build = cc::Build::new();
build.file("./cshim/x86_64.c");
+ build.compiler("@clang@");
+ // read env var set by rustPlatform.bindgenHook
+ build.try_flags_from_environment("BINDGEN_EXTRA_CLANG_ARGS").expect("the BINDGEN_EXTRA_CLANG_ARGS environment variable must be specified and UTF-8");
build.opt_level(3);
build.compile("simd_cshim");
}

View File

@@ -0,0 +1,24 @@
diff --git a/crates/algorithm/src/lib.rs b/crates/algorithm/src/lib.rs
index 853a280..f88acbf 100644
--- a/crates/algorithm/src/lib.rs
+++ b/crates/algorithm/src/lib.rs
@@ -13,6 +13,7 @@
// Copyright (c) 2025 TensorChord Inc.
#![feature(select_unpredictable)]
+#![feature(let_chains)]
#![allow(clippy::type_complexity)]
mod build;
diff --git a/src/lib.rs b/src/lib.rs
index 654b4d1..2b11d03 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -13,6 +13,7 @@
// Copyright (c) 2025 TensorChord Inc.
#![allow(unsafe_code)]
+#![feature(let_chains)]
mod datatype;
mod index;

View File

@@ -0,0 +1,65 @@
diff --git a/crates/algorithm/src/operator.rs b/crates/algorithm/src/operator.rs
index 7de8d07..c496dcd 100644
--- a/crates/algorithm/src/operator.rs
+++ b/crates/algorithm/src/operator.rs
@@ -672,7 +672,7 @@ impl Operator for Op<VectOwned<f32>, L2> {
use std::iter::zip;
let dims = vector.dims();
let t = zip(&code.1, centroid.slice())
- .map(|(&sign, &num)| std::hint::select_unpredictable(sign, num, -num))
+ .map(|(&sign, &num)| sign.select_unpredictable(num, -num))
.sum::<f32>()
/ (dims as f32).sqrt();
let sum_of_x_2 = code.0.dis_u_2;
@@ -763,7 +763,7 @@ impl Operator for Op<VectOwned<f32>, Dot> {
use std::iter::zip;
let dims = vector.dims();
let t = zip(&code.1, centroid.slice())
- .map(|(&sign, &num)| std::hint::select_unpredictable(sign, num, -num))
+ .map(|(&sign, &num)| sign.select_unpredictable(num, -num))
.sum::<f32>()
/ (dims as f32).sqrt();
let sum_of_x_2 = code.0.dis_u_2;
@@ -854,7 +854,7 @@ impl Operator for Op<VectOwned<f16>, L2> {
use std::iter::zip;
let dims = vector.dims();
let t = zip(&code.1, centroid.slice())
- .map(|(&sign, &num)| std::hint::select_unpredictable(sign, num, -num).to_f32())
+ .map(|(&sign, &num)| sign.select_unpredictable(num, -num).to_f32())
.sum::<f32>()
/ (dims as f32).sqrt();
let sum_of_x_2 = code.0.dis_u_2;
@@ -945,7 +945,7 @@ impl Operator for Op<VectOwned<f16>, Dot> {
use std::iter::zip;
let dims = vector.dims();
let t = zip(&code.1, centroid.slice())
- .map(|(&sign, &num)| std::hint::select_unpredictable(sign, num, -num).to_f32())
+ .map(|(&sign, &num)| sign.select_unpredictable(num, -num).to_f32())
.sum::<f32>()
/ (dims as f32).sqrt();
let sum_of_x_2 = code.0.dis_u_2;
diff --git a/crates/simd/src/rotate.rs b/crates/simd/src/rotate.rs
index 7a211e5..0fcd955 100644
--- a/crates/simd/src/rotate.rs
+++ b/crates/simd/src/rotate.rs
@@ -31,18 +31,17 @@ pub fn givens(lhs: &mut [f32], rhs: &mut [f32]) {
pub mod flip {
#[crate::multiversion("v4", "v3", "v2", "a2")]
pub fn flip(bits: &[u64; 1024], result: &mut [f32]) {
- use std::hint::select_unpredictable;
let result: &mut [u32] = unsafe { std::mem::transmute(result) };
let (slice, remainder) = result.as_chunks_mut::<64>();
let n = slice.len();
assert!(n <= 1024);
for i in 0..n {
for j in 0..64 {
- slice[i][j] ^= select_unpredictable((bits[i] & (1 << j)) != 0, 0x80000000, 0);
+ slice[i][j] ^= ((bits[i] & (1 << j)) != 0).select_unpredictable(0x80000000, 0);
}
}
for j in 0..remainder.len() {
- remainder[j] ^= select_unpredictable((bits[n] & (1 << j)) != 0, 0x80000000, 0);
+ remainder[j] ^= ((bits[n] & (1 << j)) != 0).select_unpredictable(0x80000000, 0);
}
}
}

View File

@@ -0,0 +1,146 @@
{
buildPgrxExtension,
cargo-pgrx_0_14_1,
clang,
fetchFromGitHub,
lib,
nix-update-script,
postgresql,
postgresqlTestExtension,
replaceVars,
rust-jemalloc-sys,
stdenv,
}:
let
buildPgrxExtension' = buildPgrxExtension.override {
# Upstream only works with a fixed version of cargo-pgrx for each release,
# so we're pinning it here to avoid future incompatibility.
cargo-pgrx = cargo-pgrx_0_14_1;
};
# Follow upstream and use rust-jemalloc-sys on linux aarch64 and x86_64
# Additionally, disable init exec TLS, since it causes issues with postgres.
# https://github.com/tensorchord/VectorChord/blob/0.4.2/Cargo.toml#L43-L44
useSystemJemalloc =
stdenv.hostPlatform.isLinux && (stdenv.hostPlatform.isAarch64 || stdenv.hostPlatform.isx86_64);
rust-jemalloc-sys' = (
rust-jemalloc-sys.override (old: {
jemalloc = old.jemalloc.override { disableInitExecTls = true; };
})
);
in
buildPgrxExtension' (finalAttrs: {
inherit postgresql;
pname = "vectorchord";
version = "0.4.2";
src = fetchFromGitHub {
owner = "tensorchord";
repo = "vectorchord";
tag = finalAttrs.version;
hash = "sha256-EdMuSNcWwCBsAY0e3d0WVug1KBWYWldvKStF6cf/uRs=";
};
patches = [
# Tell the `simd` crate to use the flags from the rust bindgen hook
(replaceVars ./0001-read-clang-flags-from-environment.diff {
clang = lib.getExe clang;
})
# Add feature flags needed for features not yet stabilised in rustc stable
./0002-add-feature-flags.diff
# The select_predictable function has been moved from std::bool to std::hint before it has been stabilized.
# This move isn't present in rustc 1.87, but upstream is using nightly so they have already updated their code.
# This patch changes the code to use the function on std::bool instead.
# See https://github.com/rust-lang/rust/pull/139726
./0003-select_unpredictable-on-bool.diff
];
buildInputs = lib.optionals (useSystemJemalloc) [
rust-jemalloc-sys'
];
useFetchCargoVendor = true;
cargoHash = "sha256-8NwfsJn5dnvog3fexzLmO3v7/3+L7xtv+PHWfCCWoHY=";
# Include upgrade scripts in the final package
# https://github.com/tensorchord/VectorChord/blob/0.4.2/crates/make/src/main.rs#L224
postInstall = ''
cp sql/upgrade/* $out/share/postgresql/extension/
'';
env = {
# Bypass rust nightly features not being available on rust stable
RUSTC_BOOTSTRAP = 1;
};
# This crate does not have the "pg_test" feature
usePgTestCheckFeature = false;
passthru = {
updateScript = nix-update-script { };
tests.extension = postgresqlTestExtension {
inherit (finalAttrs) finalPackage;
withPackages = [ "pgvector" ]; # vectorchord depends on pgvector at runtime
postgresqlExtraSettings = ''
shared_preload_libraries = 'vchord'
'';
sql = ''
CREATE EXTENSION vchord CASCADE;
CREATE TABLE items (id bigint PRIMARY KEY, embedding vector(3));
INSERT INTO items (id, embedding) VALUES
(1, '[1,2,4]'),
(2, '[1,2,5]'),
(3, '[0,0,3]'),
(4, '[0,0,2]'),
(5, '[0,0,1]');
CREATE INDEX ON items USING vchordrq (embedding vector_l2_ops) WITH (options = $$
residual_quantization = true
[build.internal]
lists = [4096]
spherical_centroids = false
$$);
SET vchordrq.probes = 1;
'';
asserts = [
{
query = "SELECT extversion FROM pg_extension WHERE extname = 'vchord'";
expected = "'${finalAttrs.version}'";
description = "Expected installed version to match the derivation's version";
}
{
query = "SELECT id FROM items WHERE embedding <-> '[1,2,3]' = 1";
expected = "1";
description = "Expected vector of row with ID=1 to have an euclidean distance from [1,2,3] of 1.";
}
{
query = "SELECT id FROM items WHERE embedding <-> '[1,2,3]' = 2";
expected = "2";
description = "Expected vector of row with ID=2 to have an euclidean distance from [1,2,3] of 2.";
}
{
query = "SELECT id FROM items ORDER BY embedding <-> '[2,3,7]' LIMIT 1";
expected = "2";
description = "Expected vector of row with ID=2 to be the closest to [2,3,7].";
}
];
};
};
meta = {
changelog = "https://github.com/tensorchord/VectorChord/releases/tag/${finalAttrs.version}";
description = "Scalable, fast, and disk-friendly vector search in Postgres, the successor of pgvecto.rs";
homepage = "https://github.com/tensorchord/VectorChord";
license = lib.licenses.agpl3Only; # dual licensed with Elastic License v2 (ELv2)
maintainers = with lib.maintainers; [
diogotcorreia
];
platforms = postgresql.meta.platforms;
};
})