summaryrefslogtreecommitdiff
path: root/gnu/packages/python-science.scm
diff options
context:
space:
mode:
authorSharlatan Hellseher <sharlatanus@gmail.com>2025-07-24 15:38:44 +0100
committerSharlatan Hellseher <sharlatanus@gmail.com>2025-07-24 15:52:13 +0100
commit18629b3d6939fb7e04d0420b3378a16fb3790558 (patch)
tree62c010dab11fac0cae7649996051c2b836fa4dc2 /gnu/packages/python-science.scm
parent068654e0e9428634f6f9cb26b0868543c23e30ad (diff)
gnu: python-dask: Move to python-science.
* gnu/packages/python-xyz.scm (python-dask, python-dask/bootstrap): Move from here ... * gnu/packages/python-science.scm: ... to here. Change-Id: I6bedc89bd09b3f2dfc3f7cbe27eb5790f32f6af7
Diffstat (limited to 'gnu/packages/python-science.scm')
-rw-r--r--gnu/packages/python-science.scm125
1 files changed, 125 insertions, 0 deletions
diff --git a/gnu/packages/python-science.scm b/gnu/packages/python-science.scm
index e0ddeb6e4c..dbbd44db70 100644
--- a/gnu/packages/python-science.scm
+++ b/gnu/packages/python-science.scm
@@ -702,6 +702,131 @@ it can be used for displaying many qualitatively different samples.")
optimization problems in Python.")
(license license:asl2.0)))
+;; Note: Remember to update python-distributed when updating dask.
+(define-public python-dask
+ (package
+ (name "python-dask")
+ (version "2024.12.1")
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/dask/dask/")
+ (commit version)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "17iqfyjphyn72xdr8fmynzvixskbq16pwmsknwc6anq7s2axvas2"))))
+ (build-system pyproject-build-system)
+ (arguments
+ (list
+ ;; Avoid coverage
+ #:test-flags
+ #~(list "--numprocesses" (number->string (parallel-job-count))
+ "-m" "not gpu and not slow and not network"
+ ;; These all fail with different hashes. Doesn't seem
+ ;; problematic.
+ "--ignore-glob=**/test_tokenize.py"
+ ;; ORC tests crash Python with a failure to find the global
+ ;; localtime file. See also
+ ;; https://github.com/apache/arrow/issues/40633.
+ "--ignore-glob=**/test_orc.py"
+ "-k" (string-append
+ ;; This one cannot be interrupted.
+ "not test_interrupt"
+ ;; This one fails with "local variable 'ctx' referenced
+ ;; before assignment". Maybe enable this in later
+ ;; versions (or when pandas has been upgraded.
+ " and not test_dt_accessor"
+ ;; This fails when dask-expr is among the inputs.
+ " and not test_groupby_internal_repr"
+ ;; This fails with different job ids.
+ " and not test_to_delayed_optimize_graph"
+ ;; This one expects a deprecation warning that never
+ ;; comes.
+ " and not test_RandomState_only_funcs"
+ ;; This test expects a RuntimeWarning that is never
+ ;; raised.
+ " and not test_nanquantile_all_nan")
+ ;; Tests must run from the output directory, because otherwise
+ ;; it complains about the difference between the target
+ ;; directory embedded in the pyc files and the source directory
+ ;; from which we run tests.
+ (getcwd))
+ #:phases
+ #~(modify-phases %standard-phases
+ (add-after 'unpack 'versioneer
+ (lambda _
+ ;; Our version of versioneer needs setup.cfg. This is adapted
+ ;; from pyproject.toml.
+ (with-output-to-file "setup.cfg"
+ (lambda ()
+ (display "\
+[versioneer]
+VCS = git
+style = pep440
+versionfile_source = dask/_version.py
+versionfile_build = dask/_version.py
+tag_prefix =
+parentdir_prefix = dask-
+")))
+ (invoke "versioneer" "install")
+ (substitute* "setup.py"
+ (("versioneer.get_version\\(\\)")
+ (string-append "\"" #$version "\"")))))
+ (add-after 'unpack 'fix-pytest-config
+ (lambda _
+ ;; This option is not supported by our version of pytest.
+ (substitute* "pyproject.toml"
+ (("--cov-config=pyproject.toml") ""))))
+ (add-after 'unpack 'patch-pyproject
+ (lambda _
+ ;; We use pyarrow > 14
+ (substitute* "pyproject.toml"
+ (("\"pyarrow_hotfix\",") ""))))
+ (add-before 'check 'pre-check
+ (lambda _ (chdir "/tmp"))))))
+ (propagated-inputs
+ (list python-click ;needed at runtime
+ python-cloudpickle
+ python-dask-expr
+ python-fsspec
+ python-importlib-metadata ;needed at runtime for dask/_compatibility.py
+ python-numpy
+ python-packaging
+ python-pandas
+ python-partd
+ python-toolz
+ python-pyyaml))
+ (native-inputs
+ (list python-importlib-metadata
+ python-pytest
+ python-pytest-rerunfailures
+ python-pytest-runner
+ python-pytest-xdist
+ python-versioneer
+ python-wheel))
+ (home-page "https://github.com/dask/dask/")
+ (synopsis "Parallel computing with task scheduling")
+ (description
+ "Dask is a flexible parallel computing library for analytics. It
+consists of two components: dynamic task scheduling optimized for computation,
+and large data collections like parallel arrays, dataframes, and lists that
+extend common interfaces like NumPy, Pandas, or Python iterators to
+larger-than-memory or distributed environments. These parallel collections
+run on top of the dynamic task schedulers.")
+ (license license:bsd-3)))
+
+(define-public python-dask/bootstrap
+ (package
+ (inherit python-dask)
+ (properties '((hidden? . #true)))
+ (arguments
+ (substitute-keyword-arguments (package-arguments python-dask)
+ ((#:tests? _ #t) #f)))
+ (propagated-inputs
+ (modify-inputs (package-propagated-inputs python-dask)
+ (delete "python-dask-expr")))))
+
(define-public python-dask-expr
(package
(name "python-dask-expr")