summaryrefslogtreecommitdiff
path: root/gnu/packages/machine-learning.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/machine-learning.scm')
-rw-r--r--gnu/packages/machine-learning.scm476
1 files changed, 121 insertions, 355 deletions
diff --git a/gnu/packages/machine-learning.scm b/gnu/packages/machine-learning.scm
index d673618fec..8df3178bd8 100644
--- a/gnu/packages/machine-learning.scm
+++ b/gnu/packages/machine-learning.scm
@@ -31,6 +31,7 @@
;;; Copyright © 2024, 2025 David Elsing <david.elsing@posteo.net>
;;; Copyright © 2024 Andy Tai <atai@atai.org>
;;; Copyright © 2025 Lapearldot <lapearldot@disroot.org>
+;;; Copyright © 2025 Cayetano Santos <csantosb@inventati.org>
;;;
;;; This file is part of GNU Guix.
;;;
@@ -82,11 +83,6 @@
#:use-module (gnu packages compression)
#:use-module (gnu packages cpp)
#:use-module (gnu packages cran)
- #:use-module (gnu packages crates-check)
- #:use-module (gnu packages crates-crypto)
- #:use-module (gnu packages crates-io)
- #:use-module (gnu packages crates-tls)
- #:use-module (gnu packages crates-web)
#:use-module (gnu packages curl)
#:use-module (gnu packages databases)
#:use-module (gnu packages dejagnu)
@@ -140,6 +136,7 @@
#:use-module (gnu packages sqlite)
#:use-module (gnu packages statistics)
#:use-module (gnu packages swig)
+ #:use-module (gnu packages textutils)
#:use-module (gnu packages time)
#:use-module (gnu packages tls)
#:use-module (gnu packages valgrind)
@@ -277,6 +274,7 @@ CTranslate2, which is a inference engine for transformer models.")
(build-system pyproject-build-system)
(arguments
(list
+ ;; tests: 7636 passed, 3859 skipped, 2 deselected, 69 xfailed, 2 xpassed
#:test-flags
'(list "-k"
(string-append
@@ -288,23 +286,14 @@ CTranslate2, which is a inference engine for transformer models.")
(propagated-inputs (list python-makefun python-multipledispatch
python-numpy python-opt-einsum
python-typing-extensions))
- (native-inputs (list python-black
- python-flake8
- python-isort
- python-nbsphinx
- python-pandas
+ (native-inputs (list python-pandas
python-pillow
python-pyro-api
python-pytest
- python-pytest-xdist
python-requests
python-scipy
- python-setuptools
- python-sphinx
- python-sphinx-gallery
- python-sphinx-rtd-theme
- python-torchvision
- python-wheel))
+ python-setuptools-next
+ python-torchvision))
(home-page "https://github.com/pyro-ppl/funsor")
(synopsis "Tensor-like library for functions and distributions")
(description
@@ -1386,34 +1375,6 @@ and not test_wmt22_references")
and reproducible BLEU, chrF, and TER scores for natural language processing.")
(license license:asl2.0)))
-(define-public rust-safetensors
- (package
- (name "rust-safetensors")
- (version "0.4.3")
- (source
- (origin
- (method url-fetch)
- (uri (crate-uri "safetensors" version))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32 "1fbx56wikqcvqb4y0ym0cys68lj0v3cpanhsy5i13fkz5jr7dvcc"))))
- (build-system cargo-build-system)
- (arguments
- `(#:cargo-inputs
- (("rust-serde" ,rust-serde-1)
- ("rust-serde-json" ,rust-serde-json-1))
- #:cargo-development-inputs
- (("rust-criterion" ,rust-criterion-0.5)
- ("rust-memmap2" ,rust-memmap2-0.9)
- ("rust-proptest" ,rust-proptest-1))))
- (home-page "https://github.com/huggingface/safetensors")
- (synopsis "Simple and safe way to store and distribute tensors")
- (description
- "This package provides a fast (zero-copy) and safe (dedicated) format for
-storing tensors safely, named safetensors. They aim to be safer than their
-@code{PyTorch} counterparts.")
- (license license:asl2.0)))
-
(define-public python-safetensors
(package
(name "python-safetensors")
@@ -1438,31 +1399,15 @@ storing tensors safely, named safetensors. They aim to be safer than their
(unless (member file '("." ".."))
(rename-file (string-append "bindings/python/" file)
file)))
- (scandir "bindings/python"))))))
+ (scandir "bindings/python"))
+ (substitute* "Cargo.toml"
+ (("^path = .*") ""))))))
(build-system cargo-build-system)
(arguments
(list
- #:modules '((guix build cargo-build-system)
- (guix build utils)
- (ice-9 regex)
- (ice-9 textual-ports)
- (srfi srfi-26))
+ #:install-source? #f
#:phases
#~(modify-phases %standard-phases
- (add-after 'unpack-rust-crates 'inject-safetensors
- (lambda _
- (substitute* "Cargo.toml"
- (("\\[dependencies\\]")
- (format #f "[dependencies]~%safetensors = ~s"
- #$(package-version rust-safetensors))))
- (call-with-input-file "Cargo.toml"
- (lambda (port)
- (let* ((content (get-string-all port))
- (top-match (string-match
- "\\[dependencies.safetensors"
- content)))
- (call-with-output-file "Cargo.toml"
- (cut display (match:prefix top-match) <>)))))))
(add-before 'check 'install-rust-library
(lambda _
(copy-file "target/release/libsafetensors_rust.so"
@@ -1492,14 +1437,9 @@ storing tensors safely, named safetensors. They aim to be safer than their
(copy-file "PKG-INFO" (string-append info "/METADATA"))
(copy-recursively
"py_src/safetensors"
- (string-append lib "safetensors"))))))
- #:cargo-inputs
- `(("rust-pyo3" ,rust-pyo3-0.21)
- ("rust-memmap2" ,rust-memmap2-0.9)
- ("rust-safetensors" ,rust-safetensors)
- ("rust-serde-json" ,rust-serde-json-1))))
+ (string-append lib "safetensors"))))))))
(inputs
- (list rust-safetensors))
+ (cargo-inputs 'python-safetensors))
(native-inputs
(list python-h5py
python-minimal
@@ -1509,9 +1449,9 @@ storing tensors safely, named safetensors. They aim to be safer than their
python-pytorch))
(home-page "https://huggingface.co/docs/safetensors")
(synopsis "Simple and safe way to store and distribute tensors")
- (description "This package provides a fast (zero-copy) and safe
-(dedicated) format for storing tensors safely. This package builds upon
-@code{rust-safetensors} and provides Python bindings.")
+ (description
+ "This package provides a fast (zero-copy) and safe (dedicated) format for
+storing tensors safely.")
(license license:asl2.0)))
(define-public python-sentencepiece
@@ -1815,7 +1755,7 @@ operators and standard data types.")
(patches (search-patches "onnx-optimizer-system-library.patch"))
(modules '((guix build utils)))
(snippet '(delete-file-recursively "third_party"))))
- (build-system python-build-system)
+ (build-system pyproject-build-system)
(arguments
;; reuse build system tweaks
(substitute-keyword-arguments (package-arguments onnx)
@@ -1842,8 +1782,9 @@ operators and standard data types.")
" and not test_fuse_transpose")))))))))
(native-inputs
(append
- (list cmake-minimal python-pytest python-pytest-runner
- python-coverage)
+ (list cmake-minimal
+ python-pytest
+ python-setuptools-next)
(filter
(lambda (pkg)
(member (or (%current-target-system)
@@ -2712,7 +2653,7 @@ Covariance Matrix Adaptation Evolution Strategy (CMA-ES) for Python.")
(define-public python-autograd
(package
(name "python-autograd")
- (version "1.7.0")
+ (version "1.8.0")
(source
(origin
(method git-fetch)
@@ -2720,12 +2661,14 @@ Covariance Matrix Adaptation Evolution Strategy (CMA-ES) for Python.")
(url "https://github.com/HIPS/autograd")
(commit (string-append "v" version))))
(sha256
- (base32 "1fpnmm3mzw355iq7w751j4mjfcr0yh324cxidba1l22652gg8r8m"))
+ (base32 "054pkhzz0h9p1jzva8774wb9dj7rvax4rcpr8ava971kbimdr2lk"))
(file-name (git-file-name name version))))
(build-system pyproject-build-system)
(native-inputs
(list python-hatchling
- python-pytest))
+ python-pytest
+ python-pytest-cov
+ python-pytest-xdist))
(propagated-inputs
(list python-future
python-numpy))
@@ -4265,8 +4208,8 @@ project, and it will potentially also do the same for the Lime project.")
(define-public gloo
(let ((version "0.0.0") ; no proper version tag
- (commit "81925d1c674c34f0dc34dd9a0f2151c1b6f701eb")
- (revision "2"))
+ (commit "c7b7b022c124d9643957d9bd55f57ac59fce8fa2")
+ (revision "3"))
(package
(name "gloo")
(version (git-version version revision commit))
@@ -4279,7 +4222,7 @@ project, and it will potentially also do the same for the Lime project.")
(file-name (git-file-name name version))
(sha256
(base32
- "16zs8ndbiv9nppn8bv6lfanzyyssz7g5pawxiqcnafwq3nvxpj9m"))))
+ "0xsp2m2if3g85l0c3cx9l0j3kz36j3kbmz9mai6kchdhrs13r7d5"))))
(build-system cmake-build-system)
(native-inputs
(list googletest))
@@ -4513,7 +4456,8 @@ on quantized 8-bit tensors.")
(srfi srfi-26)))
(snippet
'(begin
- (delete-file-recursively "bench/models")
+ (when (directory-exists? "bench/models")
+ (delete-file-recursively "bench/models"))
;; Remove autogenerated files, which contain the string
;; "Auto-generated file"
(for-each
@@ -4527,30 +4471,26 @@ on quantized 8-bit tensors.")
(get-string-all port)
"Auto-generated file")))
(delete-file path))))
- (scandir dir (negate (cut member <> '("." ".." "simd"))))))
+ (or (scandir dir (negate (cut member <> '("." ".." "simd"))))
+ '())))
(cons*
- "test" "bench" "src/enums" "src/xnnpack"
- "gen" "cmake/gen"
- (filter
- identity
- (map
- (lambda (dir)
- (let ((path
- (string-append "src/" dir "/gen")))
- (and (file-exists? path) path)))
- (scandir "src" (negate (cut member <> '("." ".."))))))))))))
+ "test" "bench" "src/enums" "src/xnnpack" "gen" "cmake/gen"
+ (filter file-exists?
+ (map (cut string-append "src/" <> "/gen")
+ (scandir "src")))))))))
(build-system cmake-build-system)
(arguments
(list
#:build-type "Release" ;; Debugging symbols require a lot of disk space
- #:configure-flags ''("-DXNNPACK_USE_SYSTEM_LIBS=YES"
- "-DBUILD_SHARED_LIBS=ON"
- "-DCMAKE_POSITION_INDEPENDENT_CODE=ON"
- "-DXNNPACK_LIBRARY_TYPE=shared"
- "-DXNNPACK_BUILD_BENCHMARKS=FALSE"
- ;; Tests fail to build with -DXNNPACK_LIBRARY_TYPE=shared:
- ;; https://github.com/google/XNNPACK/issues/6285
- "-DXNNPACK_BUILD_TESTS=OFF")
+ #:configure-flags
+ #~(list "-DXNNPACK_USE_SYSTEM_LIBS=YES"
+ "-DBUILD_SHARED_LIBS=ON"
+ "-DCMAKE_POSITION_INDEPENDENT_CODE=ON"
+ "-DXNNPACK_LIBRARY_TYPE=shared"
+ "-DXNNPACK_BUILD_BENCHMARKS=FALSE"
+ ;; Tests fail to build with -DXNNPACK_LIBRARY_TYPE=shared:
+ ;; https://github.com/google/XNNPACK/issues/6285
+ "-DXNNPACK_BUILD_TESTS=OFF")
#:tests? #f
#:modules '((ice-9 ftw)
(guix build cmake-build-system)
@@ -4576,14 +4516,18 @@ on quantized 8-bit tensors.")
(string-suffix? ".sh" name))
(let ((file (string-append "scripts/" name)))
(substitute* file
- ;; Turn the commands into targets and remove trailing
- ;; '&' characters
- (("(.*(\\.sh|\\.py|-o |--output)[^&]*)&?[[:space:]]*$" _ command)
+ ;; Turn the commands into targets. Avoid comments and
+ ;; lines starting with - (rest of multilines).
+ (("\
+^[[:space:]]*([^ #-].*/.*(\\.sh|\\.py|-o |--output)[^&]*).*$"
+ _ command)
(begin
(set! counter (+ counter 1))
- (string-append "target" (number->string counter)
+ (string-append "\ntarget" (number->string counter)
":" target-deps
- "\n\t" command "\n")))
+ "\n\t" command)))
+ ;; Remove trailing '&' characters.
+ (("&?[[:space:]]*$") "\n")
(("[[:space:]]*wait[[:space:]]*") "")
;; The commands after this line depend on the
;; previous commands in the file.
@@ -4609,12 +4553,15 @@ on quantized 8-bit tensors.")
(invoke "python3" "tools/generate-lut-norm-test.py"
"--spec" "test/u8-lut32norm.yaml"
"--output" "test/u8-lut32norm.cc")
- (invoke "python3" "tools/generate-gemm-test.py"
- "--spec" "test/qd8-f16-qb4w-gemm-minmax.yaml"
- "--output-test" "test/qd8-f16-qb4w-gemm-minmax.cc")
- (invoke "python3" "tools/generate-gemm-test.py"
- "--spec" "test/qd8-f32-qb4w-gemm-minmax.yaml"
- "--output-test" "test/qd8-f32-qb4w-gemm-minmax.cc"))))))
+ ;; Check existence to avoid doubling the phase for r-torch.
+ (when (file-exists? "test/qd8-f16-qb4w-gemm-minmax.yaml")
+ (invoke "python3" "tools/generate-gemm-test.py"
+ "--spec" "test/qd8-f16-qb4w-gemm-minmax.yaml"
+ "--output-test" "test/qd8-f16-qb4w-gemm-minmax.cc"))
+ (when (file-exists? "test/qd8-f32-qb4w-gemm-minmax.yaml")
+ (invoke "python3" "tools/generate-gemm-test.py"
+ "--spec" "test/qd8-f32-qb4w-gemm-minmax.yaml"
+ "--output-test" "test/qd8-f32-qb4w-gemm-minmax.cc")))))))
(inputs
(list clog
cpuinfo
@@ -4715,7 +4662,7 @@ TensorFlow.js, PyTorch, and MediaPipe.")
(define-public fbgemm
(package
(name "fbgemm")
- (version "1.0.0")
+ (version "1.2.0")
(source (origin
(method git-fetch)
(uri (git-reference
@@ -4724,7 +4671,7 @@ TensorFlow.js, PyTorch, and MediaPipe.")
(file-name (git-file-name name version))
(sha256
(base32
- "1a5g5f32377fad99xsfggqkwvl7vh5gc1wj77swa06x06lc1qwyw"))
+ "0fjs7179iq5hy6nl4nyswnmk90fz87zsg14p7i5bk2vbd2vrq8a3"))
(patches (search-patches "fbgemm-use-system-libraries.patch"))))
(build-system cmake-build-system)
(arguments
@@ -4934,7 +4881,7 @@ PyTorch.")
(base32
"0hdpkhcjry22fjx2zg2r48v7f4ljrclzj0li2pgk76kvyblfbyvm"))))))
-(define %python-pytorch-version "2.7.0")
+(define %python-pytorch-version "2.8.0")
(define %python-pytorch-src
(origin
@@ -4945,7 +4892,7 @@ PyTorch.")
(file-name (git-file-name "python-pytorch" %python-pytorch-version))
(sha256
(base32
- "19prdpzx34n8y2q6wx9dn9vyms6zidjvfgh58d28rfcf5z7z5ra5"))
+ "0am8mx0mq3hqsk1g99a04a4fdf865g93568qr1f247pl11r2jldl"))
(patches (search-patches "python-pytorch-system-libraries.patch"
"python-pytorch-runpath.patch"
"python-pytorch-without-kineto.patch"
@@ -4989,8 +4936,9 @@ PyTorch.")
(for-each
delete-file
(find-files dir "\\.cu$")))
- '("aten/src/ATen/native/transformers/cuda/flash_attn/kernels"
- "aten/src/ATen/native/transformers/cuda/mem_eff_attention/kernels"))))))
+ '("aten/src/ATen/native/transformers/cuda/flash_attn"
+ "aten/src/ATen/native/transformers/cuda/mem_eff_attention"
+ "aten/src/ATen/native/transformers/hip/flash_attn"))))))
(define-public qnnpack-pytorch
(package
@@ -5072,7 +5020,7 @@ PyTorch.")
#:phases
#~(modify-phases %standard-phases
(add-after 'unpack 'cmake-patches
- (lambda _
+ (lambda* (#:key inputs #:allow-other-keys)
(substitute* "cmake/Dependencies.cmake"
(("#POCKETFFT_INCLUDE_DIR")
(string-append
@@ -5080,6 +5028,9 @@ PyTorch.")
(("#FP16_INCLUDE_DIR")
(string-append
#$(this-package-input "fp16") "/include"))
+ (("#CONCURRENTQUEUE_INCLUDE_DIR")
+ (dirname (search-input-file inputs
+ "include/concurrentqueue/concurrentqueue.h")))
;; Disable opentelemetry
((".*(add_library|target_include_directories).*opentelemetry.*")
""))
@@ -5113,6 +5064,12 @@ PyTorch.")
"caffe2/serialize/inline_container.cc"
"torch/csrc/inductor/aoti_package/model_package_loader.cpp"))
+ ;; Fix moodycamel/concurrentqueue includes for system package
+ (substitute* '("c10/util/Semaphore.h"
+ "c10/test/util/Semaphore_test.cpp")
+ (("<moodycamel/concurrentqueue\\.h>") "<concurrentqueue.h>")
+ (("<moodycamel/lightweightsemaphore\\.h>") "<lightweightsemaphore.h>"))
+
(substitute* "aten/src/ATen/native/vulkan/api/Allocator.h"
(("<include/vk_mem_alloc.h>")
"<vk_mem_alloc.h>"))
@@ -5151,14 +5108,14 @@ PyTorch.")
(package-transitive-supported-systems qnnpack)))
(setenv "USE_QNNPACK" "0"))
(substitute* '("requirements.txt" "setup.py")
- (("sympy==1\\.13\\.1")
+ (("sympy>=1\\.13\\.3")
"sympy>=1.13.1"))))
(add-after 'use-system-libraries 'skip-nccl-call
(lambda _
;; Comment-out `checkout_nccl()` invokation in build_pytorch().
(substitute* "tools/build_pytorch_libs.py"
(("^[[:blank:]]*checkout_nccl\\(\\)" all)
- (string-append "# " all " # Guix: use system NCCL\n")))))
+ (string-append "# " all "\n pass")))))
;; PyTorch is still built with AVX2 and AVX-512 support selected at
;; runtime, but these dependencies require it (nnpack only for
;; x86_64).
@@ -5279,6 +5236,7 @@ PyTorch.")
(list asmjit
brotli ; for cpp-httplib
clog
+ concurrentqueue
cpp-httplib
eigen
flatbuffers
@@ -5300,6 +5258,7 @@ PyTorch.")
pybind11
;; qnnpack
qnnpack-pytorch
+ rdma-core
sleef
tensorpipe
vulkan-headers
@@ -5520,7 +5479,8 @@ Note: currently this package does not provide GPU support.")
python-tqdm
python-xxhash))
(native-inputs
- (list python-flit-core-next
+ (list openssl
+ python-flit-core-next
python-pytest
python-pytest-cov))
(home-page "https://pyg.org")
@@ -5589,7 +5549,7 @@ Actions for the Lightning suite of libraries.")
(define-public python-captum
(package
(name "python-captum")
- (version "0.7.0")
+ (version "0.8.0")
(source (origin
(method git-fetch)
(uri (git-reference
@@ -5598,35 +5558,32 @@ Actions for the Lightning suite of libraries.")
(file-name (git-file-name name version))
(sha256
(base32
- "0bgfwnlsi50hbmknn7qljiy93fi6ggwz3k7yk9kj7s37mhzaylym"))))
+ "066sal7hzpk9gsb6pk61sa9x01ckjbjb2mc8c69nc7aghqqrpqjs"))))
(build-system pyproject-build-system)
(arguments
(list
#:test-flags
- '(list "-k" (string-append
- ;; These two tests (out of more than 1000 tests) fail because of
- ;; accuracy problems.
- "not test_softmax_classification_batch_multi_target"
- " and not test_softmax_classification_batch_zero_baseline"
- ;; This test fails with PyTorch 2.7.0 due to stricter
- ;; torch.load weights_only behavior.
- " and not test_exp_sets_with_diffent_lengths"))))
+ #~(list "-k" (string-append
+ ;; These two tests (out of more than 1000 tests) fail
+ ;; because of accuracy problems.
+ "not test_softmax_classification_batch_multi_target"
+ " and not test_softmax_classification_batch_zero_baseline"
+ ;; This test fails with PyTorch 2.7.0 due to stricter
+ ;; torch.load weights_only behavior.
+ " and not test_exp_sets_with_diffent_lengths")
+ "tests")))
+ (native-inputs
+ (list python-flask
+ python-pytest
+ python-flask-compress
+ python-parameterized
+ python-scikit-learn
+ python-setuptools))
(propagated-inputs
- (list python-matplotlib python-numpy python-pytorch python-tqdm))
- (native-inputs (list jupyter
- python-annoy
- python-black
- python-flake8
- python-flask
- python-flask-compress
- python-ipython
- python-ipywidgets
- python-mypy
- python-parameterized
- python-pytest
- python-pytest-cov
- python-scikit-learn
- python-setuptools))
+ (list python-matplotlib
+ python-numpy
+ python-pytorch
+ python-tqdm))
(home-page "https://captum.ai")
(synopsis "Model interpretability for PyTorch")
(description "Captum is a model interpretability and understanding library
@@ -5844,7 +5801,7 @@ implementations and an easy-to-use API to create custom metrics. It offers:
(define-public python-torchvision
(package
(name "python-torchvision")
- (version "0.22.0")
+ (version "0.23.0")
(source (origin
(method git-fetch)
(uri (git-reference
@@ -5854,7 +5811,7 @@ implementations and an easy-to-use API to create custom metrics. It offers:
(file-name (git-file-name name version))
(sha256
(base32
- "0hz6v8796vq8kinafzyq2v2wir5s3hykfn0rnlwx7qcsz62i3ggv"))
+ "1d09xwblldgzmzfdlrsyx6mgv939z4yi1hqanm9yx63cs2mr7w85"))
(modules '((guix build utils)))
(snippet
'(begin
@@ -5949,158 +5906,6 @@ definite approximations of Optimal Transport (Wasserstein) distances.
@end itemize")
(license license:expat)))
-(define-public rust-esaxx-rs-0.1
- (package
- (name "rust-esaxx-rs")
- (version "0.1.10")
- (source
- (origin
- (method url-fetch)
- (uri (crate-uri "esaxx-rs" version))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32 "1rm6vm5yr7s3n5ly7k9x9j6ra5p2l2ld151gnaya8x03qcwf05yq"))))
- (build-system cargo-build-system)
- (arguments
- `(#:cargo-inputs (("rust-cc" ,rust-cc-1))))
- (home-page "https://github.com/Narsil/esaxx-rs")
- (synopsis "Wrapper for sentencepiece's esaxxx library")
- (description
- "This package provides a wrapper around sentencepiece's esaxxx library.")
- (license license:asl2.0)))
-
-(define-public rust-spm-precompiled-0.1
- (package
- (name "rust-spm-precompiled")
- (version "0.1.4")
- (source
- (origin
- (method url-fetch)
- (uri (crate-uri "spm_precompiled" version))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32 "09pkdk2abr8xf4pb9kq3rk80dgziq6vzfk7aywv3diik82f6jlaq"))))
- (build-system cargo-build-system)
- (arguments
- `(#:cargo-inputs
- (("rust-base64" ,rust-base64-0.13)
- ("rust-nom" ,rust-nom-7)
- ("rust-serde" ,rust-serde-1)
- ("rust-unicode-segmentation" ,rust-unicode-segmentation-1))))
- (home-page "https://github.com/huggingface/spm_precompiled")
- (synopsis "Emulate sentencepiece's DoubleArray")
- (description
- "This crate aims to emulate
-@url{https://github.com/google/sentencepiece,sentencepiece}
-Dart::@code{DoubleArray} struct and it's Normalizer. This crate is highly
-specialized and not intended for general use.")
- (license license:asl2.0)))
-
-(define-public rust-hf-hub-0.3
- (package
- (name "rust-hf-hub")
- (version "0.3.2")
- (source
- (origin
- (method url-fetch)
- (uri (crate-uri "hf-hub" version))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32 "0cnpivy9fn62lm1fw85kmg3ryvrx8drq63c96vq94gabawshcy1b"))))
- (build-system cargo-build-system)
- (arguments
- `(#:tests? #f ; require network connection
- #:cargo-inputs
- (("rust-dirs" ,rust-dirs-5)
- ("rust-futures" ,rust-futures-0.3)
- ("rust-indicatif" ,rust-indicatif-0.17)
- ("rust-log" ,rust-log-0.4)
- ("rust-native-tls" ,rust-native-tls-0.2)
- ("rust-num-cpus" ,rust-num-cpus-1)
- ("rust-rand" ,rust-rand-0.8)
- ("rust-reqwest" ,rust-reqwest-0.11)
- ("rust-serde" ,rust-serde-1)
- ("rust-serde-json" ,rust-serde-json-1)
- ("rust-thiserror" ,rust-thiserror-1)
- ("rust-tokio" ,rust-tokio-1)
- ("rust-ureq" ,rust-ureq-2))
- #:cargo-development-inputs
- (("rust-hex-literal" ,rust-hex-literal-0.4)
- ("rust-sha2" ,rust-sha2-0.10)
- ("rust-tokio-test" ,rust-tokio-test-0.4))))
- (native-inputs
- (list pkg-config))
- (inputs
- (list openssl))
- (home-page "https://github.com/huggingface/hf-hub")
- (synopsis "Interact with HuggingFace in Rust")
- (description
- "This crates aims ease the interaction with
-@url{https://huggingface.co/,huggingface}. It aims to be compatible with
-@url{https://github.com/huggingface/huggingface_hub/,huggingface_hub}
-python package, but only implements a smaller subset of functions.")
- (license license:asl2.0)))
-
-(define-public rust-tokenizers
- (package
- (name "rust-tokenizers")
- (version "0.19.1")
- (source
- (origin
- (method url-fetch)
- (uri (crate-uri "tokenizers" version))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32 "1zg6ffpllygijb5bh227m9p4lrhf0pjkysky68kddwrsvp8zl075"))
- (modules '((guix build utils)))
- (snippet
- #~(substitute* "Cargo.toml"
- (("0.1.12") ; rust-monostate requires a rust-syn-2 update
- "0.1.11")
- (("version = \"6.4\"") ; rust-onig
- "version = \"6.1.1\"")))))
- (build-system cargo-build-system)
- (arguments
- (list
- #:tests? #f ; tests are relying on missing data.
- #:cargo-inputs
- `(("rust-aho-corasick" ,rust-aho-corasick-1)
- ("rust-derive-builder" ,rust-derive-builder-0.20)
- ("rust-esaxx-rs" ,rust-esaxx-rs-0.1)
- ("rust-fancy-regex" ,rust-fancy-regex-0.13)
- ("rust-getrandom" ,rust-getrandom-0.2)
- ("rust-hf-hub" ,rust-hf-hub-0.3)
- ("rust-indicatif" ,rust-indicatif-0.17)
- ("rust-itertools" ,rust-itertools-0.12)
- ("rust-lazy-static" ,rust-lazy-static-1)
- ("rust-log" ,rust-log-0.4)
- ("rust-macro-rules-attribute" ,rust-macro-rules-attribute-0.2)
- ("rust-monostate" ,rust-monostate-0.1)
- ("rust-onig" ,rust-onig-6)
- ("rust-paste" ,rust-paste-1)
- ("rust-rand" ,rust-rand-0.8)
- ("rust-rayon" ,rust-rayon-1)
- ("rust-rayon-cond" ,rust-rayon-cond-0.3)
- ("rust-regex" ,rust-regex-1)
- ("rust-regex-syntax" ,rust-regex-syntax-0.8)
- ("rust-serde" ,rust-serde-1)
- ("rust-serde-json" ,rust-serde-json-1)
- ("rust-spm-precompiled" ,rust-spm-precompiled-0.1)
- ("rust-thiserror" ,rust-thiserror-1)
- ("rust-unicode-normalization-alignments" ,rust-unicode-normalization-alignments-0.1)
- ("rust-unicode-segmentation" ,rust-unicode-segmentation-1)
- ("rust-unicode-categories" ,rust-unicode-categories-0.1))
- #:cargo-development-inputs
- `(("rust-assert-approx-eq" ,rust-assert-approx-eq-1)
- ("rust-criterion" ,rust-criterion-0.5)
- ("rust-tempfile" ,rust-tempfile-3))))
- (home-page "https://github.com/huggingface/tokenizers")
- (synopsis "Implementation of various popular tokenizers")
- (description
- "This package provides a Rust implementation of today's most used
-tokenizers, with a focus on performances and versatility.")
- (license license:asl2.0)))
-
(define-public python-tokenizers
(package
(name "python-tokenizers")
@@ -6123,49 +5928,22 @@ tokenizers, with a focus on performances and versatility.")
(unless (member file '("." ".."))
(rename-file (string-append "bindings/python/" file) file)))
(scandir "bindings/python"))
- (delete-file-recursively ".cargo")))))
+ (delete-file-recursively ".cargo")
+ (substitute* "Cargo.toml"
+ (("^path = .*")
+ (format #f "version = ~s~%" #$version)))))))
(build-system cargo-build-system)
(arguments
(list
+ #:install-source? #f
#:cargo-test-flags ''("--no-default-features")
#:imported-modules `(,@%cargo-build-system-modules
,@%pyproject-build-system-modules)
#:modules '((guix build cargo-build-system)
((guix build pyproject-build-system) #:prefix py:)
- (guix build utils)
- (ice-9 regex)
- (ice-9 textual-ports))
+ (guix build utils))
#:phases
#~(modify-phases %standard-phases
- (add-after 'unpack-rust-crates 'inject-tokenizers
- (lambda _
- (substitute* "Cargo.toml"
- (("\\[dependencies\\]")
- (format #f "
-[dev-dependencies]
-tempfile = ~s
-pyo3 = { version = ~s, features = [\"auto-initialize\"] }
-
-[dependencies]
-tokenizers = ~s"
- #$(package-version rust-tempfile-3)
- #$(package-version rust-pyo3-0.21)
- #$(package-version rust-tokenizers))))
- (let ((file-path "Cargo.toml"))
- (call-with-input-file file-path
- (lambda (port)
- (let* ((content (get-string-all port))
- (top-match (string-match
- "\\[dependencies.tokenizers" content)))
- (call-with-output-file file-path
- (lambda (out)
- (format out "~a" (match:prefix top-match))))))))))
- (add-after 'patch-cargo-checksums 'loosen-requirements
- (lambda _
- (substitute* "Cargo.toml"
- (("version = \"6.4\"")
- (format #f "version = ~s"
- #$(package-version rust-onig-6))))))
(add-after 'check 'python-check
(lambda _
(copy-file "target/release/libtokenizers.so"
@@ -6188,28 +5966,16 @@ tokenizers = ~s"
(copy-file "PKG-INFO" (string-append info "/METADATA"))
(copy-recursively
"py_src/tokenizers"
- (string-append lib "tokenizers"))))))
- #:cargo-inputs
- `(("rust-rayon" ,rust-rayon-1)
- ("rust-serde" ,rust-serde-1)
- ("rust-serde-json" ,rust-serde-json-1)
- ("rust-libc" ,rust-libc-0.2)
- ("rust-env-logger" ,rust-env-logger-0.11)
- ("rust-pyo3" ,rust-pyo3-0.21)
- ("rust-numpy" ,rust-numpy-0.21)
- ("rust-ndarray" ,rust-ndarray-0.15)
- ("rust-onig" ,rust-onig-6)
- ("rust-itertools" ,rust-itertools-0.12)
- ("rust-tokenizers" ,rust-tokenizers))
- #:cargo-development-inputs
- `(("rust-tempfile" ,rust-tempfile-3))))
+ (string-append lib "tokenizers"))))))))
(native-inputs
- (list python-minimal python-pytest))
+ (list pkg-config python-minimal python-pytest))
+ (inputs
+ (cons oniguruma (cargo-inputs 'python-tokenizers)))
(home-page "https://huggingface.co/docs/tokenizers")
(synopsis "Implementation of various popular tokenizers")
(description
- "This package provides bindings to a Rust implementation of the most used
-tokenizers, @code{rust-tokenizers}.")
+ "This package provides an implementation of today’s most used tokenizers,
+with a focus on performance and versatility.")
(license license:asl2.0)))
(define-public python-transformers