mirror of
https://github.com/CHN-beta/nixpkgs.git
synced 2026-01-12 02:40:31 +08:00
postgresqlPackages.vectorchord: init at 0.4.2
This commit is contained in:
@@ -0,0 +1,29 @@
|
||||
diff --git a/crates/simd/build.rs b/crates/simd/build.rs
|
||||
index 12ce198..aed5588 100644
|
||||
--- a/crates/simd/build.rs
|
||||
+++ b/crates/simd/build.rs
|
||||
@@ -17,17 +17,24 @@ use std::error::Error;
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
println!("cargo::rerun-if-changed=cshim");
|
||||
+ println!("cargo:rerun-if-env-changed=BINDGEN_EXTRA_CLANG_ARGS");
|
||||
let target_arch = var("CARGO_CFG_TARGET_ARCH")?;
|
||||
match target_arch.as_str() {
|
||||
"aarch64" => {
|
||||
let mut build = cc::Build::new();
|
||||
build.file("./cshim/aarch64.c");
|
||||
+ build.compiler("@clang@");
|
||||
+ // read env var set by rustPlatform.bindgenHook
|
||||
+ build.try_flags_from_environment("BINDGEN_EXTRA_CLANG_ARGS").expect("the BINDGEN_EXTRA_CLANG_ARGS environment variable must be specified and UTF-8");
|
||||
build.opt_level(3);
|
||||
build.compile("simd_cshim");
|
||||
}
|
||||
"x86_64" => {
|
||||
let mut build = cc::Build::new();
|
||||
build.file("./cshim/x86_64.c");
|
||||
+ build.compiler("@clang@");
|
||||
+ // read env var set by rustPlatform.bindgenHook
|
||||
+ build.try_flags_from_environment("BINDGEN_EXTRA_CLANG_ARGS").expect("the BINDGEN_EXTRA_CLANG_ARGS environment variable must be specified and UTF-8");
|
||||
build.opt_level(3);
|
||||
build.compile("simd_cshim");
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
diff --git a/crates/algorithm/src/lib.rs b/crates/algorithm/src/lib.rs
|
||||
index 853a280..f88acbf 100644
|
||||
--- a/crates/algorithm/src/lib.rs
|
||||
+++ b/crates/algorithm/src/lib.rs
|
||||
@@ -13,6 +13,7 @@
|
||||
// Copyright (c) 2025 TensorChord Inc.
|
||||
|
||||
#![feature(select_unpredictable)]
|
||||
+#![feature(let_chains)]
|
||||
#![allow(clippy::type_complexity)]
|
||||
|
||||
mod build;
|
||||
diff --git a/src/lib.rs b/src/lib.rs
|
||||
index 654b4d1..2b11d03 100644
|
||||
--- a/src/lib.rs
|
||||
+++ b/src/lib.rs
|
||||
@@ -13,6 +13,7 @@
|
||||
// Copyright (c) 2025 TensorChord Inc.
|
||||
|
||||
#![allow(unsafe_code)]
|
||||
+#![feature(let_chains)]
|
||||
|
||||
mod datatype;
|
||||
mod index;
|
||||
@@ -0,0 +1,65 @@
|
||||
diff --git a/crates/algorithm/src/operator.rs b/crates/algorithm/src/operator.rs
|
||||
index 7de8d07..c496dcd 100644
|
||||
--- a/crates/algorithm/src/operator.rs
|
||||
+++ b/crates/algorithm/src/operator.rs
|
||||
@@ -672,7 +672,7 @@ impl Operator for Op<VectOwned<f32>, L2> {
|
||||
use std::iter::zip;
|
||||
let dims = vector.dims();
|
||||
let t = zip(&code.1, centroid.slice())
|
||||
- .map(|(&sign, &num)| std::hint::select_unpredictable(sign, num, -num))
|
||||
+ .map(|(&sign, &num)| sign.select_unpredictable(num, -num))
|
||||
.sum::<f32>()
|
||||
/ (dims as f32).sqrt();
|
||||
let sum_of_x_2 = code.0.dis_u_2;
|
||||
@@ -763,7 +763,7 @@ impl Operator for Op<VectOwned<f32>, Dot> {
|
||||
use std::iter::zip;
|
||||
let dims = vector.dims();
|
||||
let t = zip(&code.1, centroid.slice())
|
||||
- .map(|(&sign, &num)| std::hint::select_unpredictable(sign, num, -num))
|
||||
+ .map(|(&sign, &num)| sign.select_unpredictable(num, -num))
|
||||
.sum::<f32>()
|
||||
/ (dims as f32).sqrt();
|
||||
let sum_of_x_2 = code.0.dis_u_2;
|
||||
@@ -854,7 +854,7 @@ impl Operator for Op<VectOwned<f16>, L2> {
|
||||
use std::iter::zip;
|
||||
let dims = vector.dims();
|
||||
let t = zip(&code.1, centroid.slice())
|
||||
- .map(|(&sign, &num)| std::hint::select_unpredictable(sign, num, -num).to_f32())
|
||||
+ .map(|(&sign, &num)| sign.select_unpredictable(num, -num).to_f32())
|
||||
.sum::<f32>()
|
||||
/ (dims as f32).sqrt();
|
||||
let sum_of_x_2 = code.0.dis_u_2;
|
||||
@@ -945,7 +945,7 @@ impl Operator for Op<VectOwned<f16>, Dot> {
|
||||
use std::iter::zip;
|
||||
let dims = vector.dims();
|
||||
let t = zip(&code.1, centroid.slice())
|
||||
- .map(|(&sign, &num)| std::hint::select_unpredictable(sign, num, -num).to_f32())
|
||||
+ .map(|(&sign, &num)| sign.select_unpredictable(num, -num).to_f32())
|
||||
.sum::<f32>()
|
||||
/ (dims as f32).sqrt();
|
||||
let sum_of_x_2 = code.0.dis_u_2;
|
||||
diff --git a/crates/simd/src/rotate.rs b/crates/simd/src/rotate.rs
|
||||
index 7a211e5..0fcd955 100644
|
||||
--- a/crates/simd/src/rotate.rs
|
||||
+++ b/crates/simd/src/rotate.rs
|
||||
@@ -31,18 +31,17 @@ pub fn givens(lhs: &mut [f32], rhs: &mut [f32]) {
|
||||
pub mod flip {
|
||||
#[crate::multiversion("v4", "v3", "v2", "a2")]
|
||||
pub fn flip(bits: &[u64; 1024], result: &mut [f32]) {
|
||||
- use std::hint::select_unpredictable;
|
||||
let result: &mut [u32] = unsafe { std::mem::transmute(result) };
|
||||
let (slice, remainder) = result.as_chunks_mut::<64>();
|
||||
let n = slice.len();
|
||||
assert!(n <= 1024);
|
||||
for i in 0..n {
|
||||
for j in 0..64 {
|
||||
- slice[i][j] ^= select_unpredictable((bits[i] & (1 << j)) != 0, 0x80000000, 0);
|
||||
+ slice[i][j] ^= ((bits[i] & (1 << j)) != 0).select_unpredictable(0x80000000, 0);
|
||||
}
|
||||
}
|
||||
for j in 0..remainder.len() {
|
||||
- remainder[j] ^= select_unpredictable((bits[n] & (1 << j)) != 0, 0x80000000, 0);
|
||||
+ remainder[j] ^= ((bits[n] & (1 << j)) != 0).select_unpredictable(0x80000000, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
146
pkgs/servers/sql/postgresql/ext/vectorchord/package.nix
Normal file
146
pkgs/servers/sql/postgresql/ext/vectorchord/package.nix
Normal file
@@ -0,0 +1,146 @@
|
||||
{
|
||||
buildPgrxExtension,
|
||||
cargo-pgrx_0_14_1,
|
||||
clang,
|
||||
fetchFromGitHub,
|
||||
lib,
|
||||
nix-update-script,
|
||||
postgresql,
|
||||
postgresqlTestExtension,
|
||||
replaceVars,
|
||||
rust-jemalloc-sys,
|
||||
stdenv,
|
||||
}:
|
||||
let
|
||||
buildPgrxExtension' = buildPgrxExtension.override {
|
||||
# Upstream only works with a fixed version of cargo-pgrx for each release,
|
||||
# so we're pinning it here to avoid future incompatibility.
|
||||
cargo-pgrx = cargo-pgrx_0_14_1;
|
||||
};
|
||||
|
||||
# Follow upstream and use rust-jemalloc-sys on linux aarch64 and x86_64
|
||||
# Additionally, disable init exec TLS, since it causes issues with postgres.
|
||||
# https://github.com/tensorchord/VectorChord/blob/0.4.2/Cargo.toml#L43-L44
|
||||
useSystemJemalloc =
|
||||
stdenv.hostPlatform.isLinux && (stdenv.hostPlatform.isAarch64 || stdenv.hostPlatform.isx86_64);
|
||||
rust-jemalloc-sys' = (
|
||||
rust-jemalloc-sys.override (old: {
|
||||
jemalloc = old.jemalloc.override { disableInitExecTls = true; };
|
||||
})
|
||||
);
|
||||
in
|
||||
buildPgrxExtension' (finalAttrs: {
|
||||
inherit postgresql;
|
||||
|
||||
pname = "vectorchord";
|
||||
version = "0.4.2";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "tensorchord";
|
||||
repo = "vectorchord";
|
||||
tag = finalAttrs.version;
|
||||
hash = "sha256-EdMuSNcWwCBsAY0e3d0WVug1KBWYWldvKStF6cf/uRs=";
|
||||
};
|
||||
|
||||
patches = [
|
||||
# Tell the `simd` crate to use the flags from the rust bindgen hook
|
||||
(replaceVars ./0001-read-clang-flags-from-environment.diff {
|
||||
clang = lib.getExe clang;
|
||||
})
|
||||
# Add feature flags needed for features not yet stabilised in rustc stable
|
||||
./0002-add-feature-flags.diff
|
||||
# The select_predictable function has been moved from std::bool to std::hint before it has been stabilized.
|
||||
# This move isn't present in rustc 1.87, but upstream is using nightly so they have already updated their code.
|
||||
# This patch changes the code to use the function on std::bool instead.
|
||||
# See https://github.com/rust-lang/rust/pull/139726
|
||||
./0003-select_unpredictable-on-bool.diff
|
||||
];
|
||||
|
||||
buildInputs = lib.optionals (useSystemJemalloc) [
|
||||
rust-jemalloc-sys'
|
||||
];
|
||||
|
||||
useFetchCargoVendor = true;
|
||||
cargoHash = "sha256-8NwfsJn5dnvog3fexzLmO3v7/3+L7xtv+PHWfCCWoHY=";
|
||||
|
||||
# Include upgrade scripts in the final package
|
||||
# https://github.com/tensorchord/VectorChord/blob/0.4.2/crates/make/src/main.rs#L224
|
||||
postInstall = ''
|
||||
cp sql/upgrade/* $out/share/postgresql/extension/
|
||||
'';
|
||||
|
||||
env = {
|
||||
# Bypass rust nightly features not being available on rust stable
|
||||
RUSTC_BOOTSTRAP = 1;
|
||||
};
|
||||
|
||||
# This crate does not have the "pg_test" feature
|
||||
usePgTestCheckFeature = false;
|
||||
|
||||
passthru = {
|
||||
updateScript = nix-update-script { };
|
||||
|
||||
tests.extension = postgresqlTestExtension {
|
||||
inherit (finalAttrs) finalPackage;
|
||||
withPackages = [ "pgvector" ]; # vectorchord depends on pgvector at runtime
|
||||
postgresqlExtraSettings = ''
|
||||
shared_preload_libraries = 'vchord'
|
||||
'';
|
||||
|
||||
sql = ''
|
||||
CREATE EXTENSION vchord CASCADE;
|
||||
|
||||
CREATE TABLE items (id bigint PRIMARY KEY, embedding vector(3));
|
||||
INSERT INTO items (id, embedding) VALUES
|
||||
(1, '[1,2,4]'),
|
||||
(2, '[1,2,5]'),
|
||||
(3, '[0,0,3]'),
|
||||
(4, '[0,0,2]'),
|
||||
(5, '[0,0,1]');
|
||||
|
||||
CREATE INDEX ON items USING vchordrq (embedding vector_l2_ops) WITH (options = $$
|
||||
residual_quantization = true
|
||||
[build.internal]
|
||||
lists = [4096]
|
||||
spherical_centroids = false
|
||||
$$);
|
||||
|
||||
SET vchordrq.probes = 1;
|
||||
'';
|
||||
|
||||
asserts = [
|
||||
{
|
||||
query = "SELECT extversion FROM pg_extension WHERE extname = 'vchord'";
|
||||
expected = "'${finalAttrs.version}'";
|
||||
description = "Expected installed version to match the derivation's version";
|
||||
}
|
||||
{
|
||||
query = "SELECT id FROM items WHERE embedding <-> '[1,2,3]' = 1";
|
||||
expected = "1";
|
||||
description = "Expected vector of row with ID=1 to have an euclidean distance from [1,2,3] of 1.";
|
||||
}
|
||||
{
|
||||
query = "SELECT id FROM items WHERE embedding <-> '[1,2,3]' = 2";
|
||||
expected = "2";
|
||||
description = "Expected vector of row with ID=2 to have an euclidean distance from [1,2,3] of 2.";
|
||||
}
|
||||
{
|
||||
query = "SELECT id FROM items ORDER BY embedding <-> '[2,3,7]' LIMIT 1";
|
||||
expected = "2";
|
||||
description = "Expected vector of row with ID=2 to be the closest to [2,3,7].";
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
meta = {
|
||||
changelog = "https://github.com/tensorchord/VectorChord/releases/tag/${finalAttrs.version}";
|
||||
description = "Scalable, fast, and disk-friendly vector search in Postgres, the successor of pgvecto.rs";
|
||||
homepage = "https://github.com/tensorchord/VectorChord";
|
||||
license = lib.licenses.agpl3Only; # dual licensed with Elastic License v2 (ELv2)
|
||||
maintainers = with lib.maintainers; [
|
||||
diogotcorreia
|
||||
];
|
||||
platforms = postgresql.meta.platforms;
|
||||
};
|
||||
})
|
||||
Reference in New Issue
Block a user