diff options
Diffstat (limited to 'gnu/packages/machine-learning.scm')
-rw-r--r-- | gnu/packages/machine-learning.scm | 223 |
1 files changed, 191 insertions, 32 deletions
diff --git a/gnu/packages/machine-learning.scm b/gnu/packages/machine-learning.scm index 34dd73e32e..d673618fec 100644 --- a/gnu/packages/machine-learning.scm +++ b/gnu/packages/machine-learning.scm @@ -17,7 +17,7 @@ ;;; Copyright © 2020 Edouard Klein <edk@beaver-labs.com> ;;; Copyright © 2020-2025 Vinicius Monego <monego@posteo.net> ;;; Copyright © 2020, 2021, 2022, 2023 Maxim Cournoyer <maxim.cournoyer@gmail.com> -;;; Copyright © 2022, 2023, 2024 Nicolas Graves <ngraves@ngraves.fr> +;;; Copyright © 2022-2025 Nicolas Graves <ngraves@ngraves.fr> ;;; Copyright © 2022 Kiran Shila <me@kiranshila.com> ;;; Copyright © 2022 Wiktor Zelazny <wzelazny@vurv.cz> ;;; Copyright © 2023 zamfofex <zamfofex@twdb.moe> @@ -76,6 +76,7 @@ #:use-module (gnu packages boost) #:use-module (gnu packages build-tools) #:use-module (gnu packages c) + #:use-module (gnu packages calendar) #:use-module (gnu packages check) #:use-module (gnu packages cmake) #:use-module (gnu packages compression) @@ -220,6 +221,40 @@ representations and sentence classification.") family of functions.") (license license:expat))) +(define-public python-faster-whisper + (package + (name "python-faster-whisper") + (version "1.1.1") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/SYSTRAN/faster-whisper") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 "0g9cdvphifn4rqhh7p4z1d3pp6bhcx0jmcahjigvcgry0qsijgfn")))) + (build-system pyproject-build-system) + ;; XXX: Currently tests requires model download, which we'd rather avoid + ;; in Guix unless we're sure about the FOSS weights. To test in source : + ;; guix shell -D python-faster-whisper -- pytest + (arguments (list #:tests? #f)) + (propagated-inputs (list (list onnxruntime "python") + python-av + python-ctranslate2 + python-huggingface-hub + python-tokenizers + python-tqdm)) + (native-inputs (list python-numpy + python-pytest + python-setuptools-next)) + (home-page "https://github.com/SYSTRAN/faster-whisper") + (synopsis "Whisper transcription reimplementation") + (description + "This package provides a reimplementation of OpenAI's Whisper model using +CTranslate2, which is a inference engine for transformer models.") + (license license:expat))) + (define-public python-fasttext (package (inherit fasttext) @@ -1501,37 +1536,25 @@ unsupervised text tokenizer.") (define-public python-sentence-transformers (package (name "python-sentence-transformers") - (version "3.0.1") + (version "5.1.0") (source (origin - (method url-fetch) - (uri (pypi-uri "sentence_transformers" version)) + (method git-fetch) + (uri (git-reference + (url "https://github.com/UKPLab/sentence-transformers/") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) (sha256 - (base32 "1xmzbyrlp6wa7adf42n67c544db17nz95b10ri603lf4gi9jqgca")))) + (base32 + "1jkj77q25b21nxrdszvlw127jnx1m7x8czldiq2mfyj76yjk0ymj")))) (build-system pyproject-build-system) (arguments - (list - #:test-flags - '(list - ;; Missing fixture / train or test data. - ;; Requires internet access. - "--ignore=tests/test_sentence_transformer.py" - "--ignore=tests/test_train_stsb.py" - "--ignore=tests/test_compute_embeddings.py" - "--ignore=tests/test_cross_encoder.py" - "--ignore=tests/test_model_card_data.py" - "--ignore=tests/test_multi_process.py" - "--ignore=tests/test_pretrained_stsb.py" - "-k" (string-append - "not test_LabelAccuracyEvaluator" - " and not test_ParaphraseMiningEvaluator" - " and not test_cmnrl_same_grad" - " and not test_paraphrase_mining" - " and not test_simple_encode")))) + (list #:tests? #f)) ;network access is required (propagated-inputs (list python-huggingface-hub python-numpy python-pillow python-pytorch + python-typing-extensions python-scikit-learn python-scipy python-tqdm @@ -1843,6 +1866,139 @@ aim is to provide all such passes along with ONNX so that they can be re-used with a single function call.") (license license:expat))) +(define-public onnxruntime + (package + (name "onnxruntime") + (version "1.22.0") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/microsoft/onnxruntime") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 "0z2s79l4wdilssw9lmj319ypyyqi2y0dx0fpwr2yhq8bax3ci50n")))) + (build-system cmake-build-system) + (arguments + (list + #:modules '((guix build cmake-build-system) + (guix build utils) + ((guix build pyproject-build-system) #:prefix py:)) + #:imported-modules (append %cmake-build-system-modules + %pyproject-build-system-modules) + #:configure-flags + #~(list "-Donnxruntime_BUILD_UNIT_TESTS=OFF" + "-Donnxruntime_BUILD_SHARED_LIB=ON" + "-Donnxruntime_ENABLE_LTO=ON" + "-Donnxruntime_ENABLE_PYTHON=ON" + "-Donnxruntime_USE_FULL_PROTOBUF=OFF" + ;; XXX: Fixes build with gcc@14. + "-DCMAKE_CXX_FLAGS=-Wl,-z,noexecstack") + #:phases + #~(modify-phases %standard-phases + (add-after 'unpack 'chdir + (lambda _ + (chdir "cmake"))) + (add-after 'unpack 'relax-dependencies + (lambda _ + (with-output-to-file "cmake/external/eigen.cmake" + (lambda _ + (display "find_package(Eigen3 REQUIRED)\n"))) + (substitute* "cmake/external/abseil-cpp.cmake" + (("20240722") + (car (string-split #$(package-version + (this-package-input "abseil-cpp")) + #\.)))))) + (add-after 'install 'build-python + (lambda _ + (invoke "python3" "../setup.py" "bdist_wheel"))) + (add-after 'build-python 'install-python + (lambda* (#:key inputs #:allow-other-keys) + ((assoc-ref py:%standard-phases 'install) + #:inputs inputs + #:outputs `(("out" . ,#$output:python))))) + (add-after 'install-python 'add-install-to-pythonpath + (lambda* (#:key inputs #:allow-other-keys) + ((assoc-ref py:%standard-phases 'add-install-to-pythonpath) + #:inputs inputs + #:outputs `(("out" . ,#$output:python))))) + (delete 'check) + (add-after 'add-install-to-pythonpath 'check + (lambda* (#:key tests? #:allow-other-keys) + (with-directory-excursion "../onnxruntime/test/python" + ((assoc-ref py:%standard-phases 'check) + #:tests? tests? + #:test-flags + `(;; XXX: NotImplementedError + "--ignore-glob=quantization/*" + ;; XXX: These tests require transformer models or have + ;; import issues. + "--ignore=transformers/test_generation.py" + "--ignore=transformers/test_gpt2_benchmark.py" + "--ignore=transformers/test_gpt2_to_onnx.py" + "--ignore=transformers/test_optimizer_huggingface_bert.py" + "--ignore=transformers/test_parity_huggingface_gpt_attention.py" + "--ignore=transformers/test_shape_infer_helper.py" + ;; XXX: onnxscript ModuleNotFound + "--ignore=transformers/test_gelu_fusions.py" + "--ignore=transformers/test_gemma3_vision.py" + ;; XXX: Other failing tests. + "-k" ,(string-append + "not test_gelu_is_fused_by_default" + " and not test_inverse")))))) + (add-after 'check 'python-sanity-check + (lambda* (#:key tests? inputs #:allow-other-keys) + ((assoc-ref py:%standard-phases 'sanity-check) + #:inputs `(("sanity-check.py" . ,#$(default-sanity-check.py)) + ,@inputs) + #:outputs `(("out" . ,#$output:python)))))))) + (outputs (list "out" "python")) + (inputs + (list abseil-cpp + boost + cpuinfo + dlpack + c++-gsl + date + eigen-for-onnxruntime + flatbuffers-23.5 + googletest + nlohmann-json + onnx + protobuf + pybind11 + re2-next + safeint + zlib)) + (native-inputs + (list pkg-config + python-einops + python-wrapper + python-numpy + python-parameterized + python-psutil + python-pytest + python-pytorch + python-sentencepiece + python-setuptools-next)) + (propagated-inputs + (list python-coloredlogs + python-flatbuffers + python-protobuf + python-sympy)) + (home-page "https://github.com/microsoft/onnxruntime") + (synopsis "Cross-platform, high performance scoring engine for ML models") + (description + "ONNX Runtime is a performance-focused complete scoring engine +for Open Neural Network Exchange (ONNX) models, with an open +extensible architecture to continually address the latest developments +in AI and Deep Learning. ONNX Runtime stays up to date with the ONNX +standard with complete implementation of all ONNX operators, and +supports all ONNX releases (1.2+) with both future and backwards +compatibility.") + (license license:expat))) + (define-public rxcpp (package (name "rxcpp") @@ -6669,22 +6825,25 @@ simple speech recognition.") (origin (method git-fetch) (uri (git-reference - (url "https://github.com/ideasman42/nerd-dictation") - (commit commit))) + (url "https://github.com/ideasman42/nerd-dictation") + (commit commit))) (file-name (git-file-name name version)) (sha256 (base32 "0frdpswv6w3cwj3c7wd5w8gj3s1hvpdwd48qhfhfxf7imahz9bqf")))) - (build-system python-build-system) + (build-system pyproject-build-system) (arguments - '(#:phases - (modify-phases %standard-phases - (add-after 'unpack 'chdir - (lambda _ (chdir "package/python")))))) + (list + #:phases + #~(modify-phases %standard-phases + (add-after 'unpack 'chdir + (lambda _ + (chdir "package/python")))))) + (native-inputs (list python-setuptools python-wheel)) (propagated-inputs (list python-vosk)) (home-page "https://github.com/ideasman42/nerd-dictation") (synopsis "Offline speech-to-text for desktop Linux") - (description "\ -This package provides simple access speech to text for using in + (description + "This package provides simple access speech to text for using in Linux without being tied to a desktop environment, using the @code{vosk-api}. The user configuration lets you manipulate text using Python string operations. It has zero overhead, as this relies on manual activation and |