diff options
Diffstat (limited to 'gnu/packages/machine-learning.scm')
-rw-r--r-- | gnu/packages/machine-learning.scm | 316 |
1 files changed, 46 insertions, 270 deletions
diff --git a/gnu/packages/machine-learning.scm b/gnu/packages/machine-learning.scm index 3d29913703..2b6955b406 100644 --- a/gnu/packages/machine-learning.scm +++ b/gnu/packages/machine-learning.scm @@ -683,7 +683,7 @@ Performance is achieved by using the LLVM JIT compiler.") (deprecated-package "guile-aiscm-next" guile-aiscm)) (define-public llama-cpp - (let ((tag "b5013")) + (let ((tag "b6056")) (package (name "llama-cpp") (version (string-append "0.0.0-" tag)) @@ -695,7 +695,7 @@ Performance is achieved by using the LLVM JIT compiler.") (commit tag))) (file-name (git-file-name name tag)) (sha256 - (base32 "0s73dz871x53dr366lkzq19f677bwgma2ri8m5vhbfa9p8yp4p3r")))) + (base32 "1y9blrd7c8snazjmjkzj0148v328pigncvf1l9g1ih735b67zpd0")))) (build-system cmake-build-system) (arguments (list @@ -730,11 +730,23 @@ Performance is achieved by using the LLVM JIT compiler.") #~(modify-phases %standard-phases (add-after 'unpack 'patch-paths (lambda* (#:key inputs #:allow-other-keys) - (substitute* "ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp" - (("\"/bin/sh\"") - (string-append "\"" (search-input-file inputs "/bin/sh") "\""))))) + (substitute* (format #f "~a~a" + "ggml/src/ggml-vulkan/vulkan-shaders/" + "vulkan-shaders-gen.cpp") + (("\"/bin/sh\"") + (string-append "\"" (search-input-file inputs "/bin/sh") + "\""))))) (add-after 'unpack 'fix-tests (lambda _ + ;; test-thread-safety downloads ML model from network, + ;; cannot run in Guix build environment + (substitute* '("tests/CMakeLists.txt") + (("llama_build_and_test\\(test-thread-safety.cpp.*") + "") + ;; error while handling argument "-m": expected value for + ;; argument + (("llama_build_and_test\\(test-arg-parser.cpp.*") + "")) ;; test-eval-callback downloads ML model from network, cannot ;; run in Guix build environment (substitute* '("examples/eval-callback/CMakeLists.txt") @@ -751,11 +763,13 @@ Performance is achieved by using the LLVM JIT compiler.") (string-append (assoc-ref outputs "out") "/bin") "^test-"))))))) - (inputs (list curl glslang python python-gguf - vulkan-headers vulkan-loader)) - (native-inputs (list pkg-config shaderc bash-minimal)) + (inputs + (list curl glslang python-gguf python-minimal openblas spirv-headers + spirv-tools vulkan-headers vulkan-loader)) + (native-inputs + (list bash-minimal pkg-config shaderc)) (propagated-inputs - (list python-numpy python-pytorch python-sentencepiece openblas)) + (list python-numpy python-pytorch python-sentencepiece)) (properties '((tunable? . #true))) ;use AVX512, FMA, etc. when available (home-page "https://github.com/ggml-org/llama.cpp") (synopsis "Port of Facebook's LLaMA model in C/C++") @@ -800,9 +814,15 @@ independently to be able to run a LLaMA model.") "-DGGML_AVX2=OFF" "-DGGML_AVX512=OFF" "-DGGML_AVX512_VBMI=OFF" - "-DGGML_AVX512_VNNI=OFF") + "-DGGML_AVX512_VNNI=OFF" + "-DGGML_VULKAN=ON") #:phases #~(modify-phases %standard-phases + (add-after 'unpack 'patch-paths + (lambda* (#:key inputs #:allow-other-keys) + (substitute* "ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp" + (("\"/bin/sh\"") + (string-append "\"" (search-input-file inputs "/bin/sh") "\""))))) #$@(if (not (target-64bit?)) '((add-after 'unpack 'skip-failing-tests (lambda _ @@ -823,9 +843,10 @@ independently to be able to run a LLaMA model.") (("\\$\\{VAD_TARGET\\} PROPERTIES LABELS \"base;en\"") "${VAD_TEST} PROPERTIES DISABLED true"))))))) (native-inputs - (list pkg-config)) + (list pkg-config shaderc)) (inputs - (list openblas sdl2 git)) + (list openblas sdl2 git spirv-headers spirv-tools + vulkan-headers vulkan-loader)) (synopsis "OpenAI's Whisper model in C/C++") (description "This package is a high-performance inference of OpenAI's @@ -843,7 +864,7 @@ without dependencies, with @item C-style API @end itemize") (properties '((tunable? . #true))) ;use AVX512, FMA, etc. when available - (home-page "https://github.com/ggerganov/whisper.cpp") + (home-page "https://github.com/ggml-org/whisper.cpp/") (license license:expat))) (define-public mcl @@ -2268,50 +2289,6 @@ number of threads used in the threadpool-backed of common native libraries used for scientific computing and data science (e.g. BLAS and OpenMP).") (license license:bsd-3))) -(define-public python-imbalanced-learn - (package - (name "python-imbalanced-learn") - (version "0.12.2") - (source - (origin - (method url-fetch) - (uri (pypi-uri "imbalanced-learn" version)) - (sha256 - (base32 "1hgncab4g4xry7yl6wwsj1wmfnxbsajx6qmycvr28wdhvk75c358")))) - (build-system pyproject-build-system) - (arguments - (list - #:test-flags '(list "-k" - ;; Although we cannot satify the Tensorflow and Keras requirements - ;; (python-keras >= 2.4.3 and tensorflow >= 2.4.3), all tests - ;; besides these pass. - "not balanced_batch_generator and not BalancedBatchGenerator") - #:phases '(modify-phases %standard-phases - (add-after 'unpack 'unbreak-tests - (lambda _ - ;; Some tests require a home directory - (setenv "HOME" - (getcwd))))))) - (propagated-inputs (list python-joblib python-numpy python-scikit-learn - python-scipy python-threadpoolctl)) - (native-inputs (list python-black - python-flake8 - python-keras - python-mypy - python-pandas - python-pytest - python-pytest-cov - python-setuptools - python-wheel - tensorflow)) - (home-page "https://github.com/scikit-learn-contrib/imbalanced-learn") - (synopsis "Toolbox for imbalanced dataset in machine learning") - (description - "This is a Python package offering a number of re-sampling -techniques commonly used in datasets showing strong between-class imbalance. -It is compatible with @code{scikit-learn}.") - (license license:expat))) - (define-public python-hdbscan (package (name "python-hdbscan") @@ -4127,213 +4104,6 @@ any function). It currently contains the interface and IO code from the Shap project, and it will potentially also do the same for the Lime project.") (license license:expat))) -(define-public python-keras-applications - (package - (name "python-keras-applications") - (version "1.0.8") - (source - (origin - (method url-fetch) - (uri (pypi-uri "Keras_Applications" version)) - (sha256 - (base32 - "1rcz31ca4axa6kzhjx4lwqxbg4wvlljkj8qj9a7p9sfd5fhzjyam")))) - (build-system python-build-system) - ;; The tests require Keras, but this package is needed to build Keras. - (arguments '(#:tests? #f)) - (propagated-inputs - (list python-h5py python-numpy)) - (native-inputs - (list python-pytest python-pytest-cov - python-pytest-xdist)) - (home-page "https://github.com/keras-team/keras-applications") - (synopsis "Reference implementations of popular deep learning models") - (description - "This package provides reference implementations of popular deep learning -models for use with the Keras deep learning framework.") - (license license:expat))) - -(define-public python-keras-preprocessing - (package - (name "python-keras-preprocessing") - (version "1.1.0") - (source - (origin - (method url-fetch) - (uri (pypi-uri "Keras_Preprocessing" version)) - (sha256 - (base32 - "1r98nm4k1svsqjyaqkfk23i31bl1kcfcyp7094yyj3c43phfp3as")))) - (build-system python-build-system) - (propagated-inputs - (list python-numpy python-six)) - (native-inputs - (list python-pandas - python-pillow - python-pytest - python-pytest-cov - python-pytest-xdist - tensorflow)) - (home-page "https://github.com/keras-team/keras-preprocessing/") - (synopsis "Data preprocessing and augmentation for deep learning models") - (description - "Keras Preprocessing is the data preprocessing and data augmentation -module of the Keras deep learning library. It provides utilities for working -with image data, text data, and sequence data.") - (license license:expat))) - -(define-public python-keras - (package - (name "python-keras") - (version "2.3.1") - (source - (origin - (method url-fetch) - (uri (pypi-uri "Keras" version)) - (sha256 - (base32 - "1k68xd8n2y9ldijggjc8nn4d6d1axw0p98gfb0fmm8h641vl679j")) - (modules '((guix build utils))) - (snippet - '(substitute* '("keras/callbacks/callbacks.py" - "keras/engine/training_utils.py" - "keras/engine/training.py" - "keras/engine/training_generator.py" - "keras/utils/generic_utils.py") - (("from collections import Iterable") - "from collections.abc import Iterable") - (("collections.Container") - "collections.abc.Container") - (("collections.Mapping") - "collections.abc.Mapping") - (("collections.Sequence") - "collections.abc.Sequence"))))) - (build-system python-build-system) - (arguments - `(#:phases - (modify-phases %standard-phases - (add-after 'unpack 'tf-compatibility - (lambda _ - (substitute* "keras/backend/tensorflow_backend.py" - (("^get_graph = .*") - "get_graph = tf.get_default_graph") - (("tf.compat.v1.nn.fused_batch_norm") - "tf.nn.fused_batch_norm") - ;; categorical_crossentropy does not support axis - (("from_logits=from_logits, axis=axis") - "from_logits=from_logits") - ;; dropout accepts a level number, not a named rate argument. - (("dropout\\(x, rate=level,") - "dropout(x, level,") - (("return x.shape.rank") - "return len(x.shape)")))) - (add-after 'unpack 'hdf5-compatibility - (lambda _ - ;; The truth value of an array with more than one element is ambiguous. - (substitute* "tests/keras/utils/io_utils_test.py" - ((" *assert .* == \\[b'(asd|efg).*") "")) - (substitute* "tests/test_model_saving.py" - (("h5py.File\\('does not matter',") - "h5py.File('does not matter', 'w',")) - (substitute* "keras/utils/io_utils.py" - (("h5py.File\\('in-memory-h5py', driver='core', backing_store=False\\)") - "h5py.File('in-memory-h5py', 'w', driver='core', backing_store=False)") - (("h5file.fid.get_file_image") - "h5file.id.get_file_image")) - (substitute* "keras/engine/saving.py" - (("\\.decode\\('utf-?8'\\)") "")))) - (add-after 'unpack 'delete-unavailable-backends - (lambda _ - (delete-file "keras/backend/theano_backend.py") - (delete-file "keras/backend/cntk_backend.py"))) - (replace 'check - (lambda* (#:key tests? #:allow-other-keys) - (when tests? - ;; These tests attempt to download data files from the internet. - (delete-file "tests/integration_tests/test_datasets.py") - (delete-file "tests/integration_tests/imagenet_utils_test.py") - (invoke "python" "-m" "pytest" "tests" - "-p" "no:pep8" - ;; FIXME: python-build-system lacks PARALLEL-TESTS? - "-n" (number->string (parallel-job-count)) - ;; This one uses the theano backend that we don't have. - "--ignore=tests/test_api.py" - "--ignore=tests/keras/backend/backend_test.py" - ;; Our Tensorflow version does not have the coder ops library. - "--ignore=tests/keras/callbacks/callbacks_test.py" - ;; ...nor do we have tensorboard - "--ignore=tests/keras/callbacks/tensorboard_test.py" - "-k" - (string-append - ;; See https://github.com/keras-team/keras/pull/7033 - "not test_TimeDistributed_learning_phase " - ;; XXX fails because no closure is provided - "and not test_func_dump_and_load_backwards_compat " - ;; XXX real bug? These are all tests that fail due to - ;; shape mismatch, e.g. "got logits shape [12,3] and - ;; labels shape [9]" - "and not test_model_with_crossentropy_losses_channels_first " - "and not test_masking_correctness_output_size_not_equal_to_first_state_size " - "and not test_convolutional_recurrent " - "and not test_axis " - - ;; XXX fails because of 3/15 values have unexpected differences. - "and not test_masking_correctness_output_not_equal_to_first_state " - ;; XXX fails because of a difference of about 0.1 - "and not test_sample_weighted " - ;; XXX fails because of a difference of about 0.3 - "and not test_scalar_weighted " - ;; XXX fails because of a difference of about 0.2 - "and not test_unweighted " - - ;; XXX I cannot reproduce this in an interactive - ;; Python session, because l2_norm works just fine. - "and not test_weighted " ;TestCosineSimilarity - "and not test_config " ;TestCosineSimilarity - - ;; The following test fails only in the build - ;; container; skip it. - "and not test_selu " - ;; The following test was found flaky and removed in - ;; recent versions. - "and not test_stateful_metrics")))))))) - (propagated-inputs - (list python-h5py - python-keras-applications - python-keras-preprocessing - python-numpy - python-pydot - python-pyyaml - python-scipy - python-six - tensorflow - graphviz)) - (native-inputs - (list python-flaky - python-markdown - python-pandas - python-pytest - python-pytest-cov - python-pytest-timeout - python-pytest-xdist - python-pyux - python-sphinx - python-requests)) - (home-page "https://keras.io/") - (synopsis "High-level deep learning framework") - (description "Keras is a high-level neural networks API, written in Python -and capable of running on top of TensorFlow. It was developed with a focus on -enabling fast experimentation. Use Keras if you need a deep learning library -that: -@itemize -@item Allows for easy and fast prototyping (through user friendliness, - modularity, and extensibility). -@item Supports both convolutional networks and recurrent networks, as well as - combinations of the two. -@item Runs seamlessly on CPU and GPU. -@end itemize\n") - (license license:expat))) - (define-public gloo (let ((version "0.0.0") ; no proper version tag (commit "81925d1c674c34f0dc34dd9a0f2151c1b6f701eb") @@ -4800,7 +4570,6 @@ TensorFlow.js, PyTorch, and MediaPipe.") (build-system cmake-build-system) (arguments (list - #:cmake cmake-next #:configure-flags ''("-DFBGEMM_LIBRARY_TYPE=shared") ;; Tests require AVX2 or AVX-512 instructions @@ -4869,7 +4638,6 @@ the tensors contained therein.") (build-system cmake-build-system) (arguments (list - #:test-target "cpptest" #:configure-flags #~(list "-DUSE_OPENCL=ON" "-DUSE_VULKAN=ON" @@ -4889,11 +4657,11 @@ the tensors contained therein.") #:phases #~(modify-phases %standard-phases (replace 'check - (lambda* (#:key source test-target tests? #:allow-other-keys) + (lambda* (#:key source tests? #:allow-other-keys) (when tests? (begin (invoke "make" "-j" - (number->string (parallel-job-count)) test-target) + (number->string (parallel-job-count)) "cpptest") ;; Disable below the actual run of the tests because ;; several fail due to platform variations (for example, ;; fp16 tests fail because not supported on CPUs). @@ -6365,7 +6133,6 @@ Jax, PyTorch and TensorFlow — with a seamless integration between them.") (build-system cmake-build-system) (arguments (list - #:test-target "ctranslate2_test" ;; XXX: mkl and openblas seem incompatible. #:configure-flags `(list "-DBUILD_TESTS=ON" "-DWITH_ACCELERATE=OFF" @@ -6374,7 +6141,16 @@ Jax, PyTorch and TensorFlow — with a seamless integration between them.") "-DWITH_CUDA=OFF" "-DWITH_CUDNN=OFF" "-DWITH_MKL=OFF" - "-DWITH_OPENBLAS=ON"))) + "-DWITH_OPENBLAS=ON") + #:modules '((guix build cmake-build-system) + ((guix build gnu-build-system) #:prefix gnu:) + (guix build utils)) + #:phases + #~(modify-phases %standard-phases + (replace 'check + (lambda* (#:rest args) + (apply (assoc-ref gnu:%standard-phases 'check) + #:test-target "ctranslate2_test" args)))))) (native-inputs (list libomp cxxopts spdlog |