diff options
author | Liliana Marie Prikler <liliana.prikler@gmail.com> | 2024-01-07 18:31:32 +0100 |
---|---|---|
committer | Liliana Marie Prikler <liliana.prikler@gmail.com> | 2024-01-07 18:31:32 +0100 |
commit | 2cbb1a6e1e2679c7db74b6b906d223fb3661d09f (patch) | |
tree | ab4101e769bbac1d53950004044fd297223b9492 /gnu/packages/bioinformatics.scm | |
parent | b369dc272eeba4eafb2bc7dc56f7b3cf09fb0961 (diff) | |
parent | b212e6934643e085f168a5364cb593f61aa616ba (diff) |
Merge branch 'master' into gnome-team
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r-- | gnu/packages/bioinformatics.scm | 416 |
1 files changed, 398 insertions, 18 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index 2e4bc876d4..7dbebcf3da 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -130,6 +130,7 @@ #:use-module (gnu packages pdf) #:use-module (gnu packages perl) #:use-module (gnu packages perl-check) + #:use-module (gnu packages perl-web) #:use-module (gnu packages pkg-config) #:use-module (gnu packages popt) #:use-module (gnu packages protobuf) @@ -574,6 +575,30 @@ BED, GFF/GTF, VCF.") whole-genome bisulfite sequencing (WGBS) reads from directional protocol.") (license license:asl2.0))) +(define-public bustools + (package + (name "bustools") + (version "0.43.2") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/BUStools/bustools") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "068kjlc4d528269nl5mc3j8h2c95r1v545d3fi1iw1ckg8rba0hg")))) + (build-system cmake-build-system) + (arguments (list #:tests? #f)) ;no test target + (inputs (list zlib)) + (home-page "https://bustools.github.io") + (synopsis "Tools for working with BUS files") + (description "bustools is a program for manipulating BUS files for single +cell RNA-Seq datasets. It can be used to error correct barcodes, collapse +UMIs, produce gene count or transcript compatibility count matrices, and is useful +for many other tasks.") + (license license:bsd-2))) + (define-public cellsnp-lite ;; Last release is from November 2021 and does not contain fixes. (let ((commit "0885d746b0b1ea65c8ef92f8943ca7669ca9734a") @@ -617,6 +642,50 @@ Compared to cellSNP, this package is more efficient with higher speed and less memory usage.") (license license:asl2.0)))) +(define-public cpat + (package + (name "cpat") + (version "3.0.4") + (source (origin + (method url-fetch) + (uri (pypi-uri "CPAT" version)) + (sha256 + (base32 + "0dfrwwbhv1n4nh2a903d1qfb30fgxgya89sa70aci3wzf8h2z0vd")) + (modules '((guix build utils))) + (snippet + '(for-each delete-file-recursively + (list ".eggs" + "lib/__pycache__/" + "lib/cpmodule/__pycache__/"))))) + (build-system pyproject-build-system) + (arguments + (list + #:phases + '(modify-phases %standard-phases + (replace 'check + (lambda* (#:key tests? #:allow-other-keys) + (when tests? + (with-directory-excursion "test" + ;; There is no test4.fa + (substitute* "test.sh" + ((".*-g test4.fa.*") "")) + (invoke "bash" "test.sh")))))))) + (propagated-inputs + (list python-numpy python-pysam)) + (inputs + (list r-minimal)) + (home-page "https://wlcb.oit.uci.edu/cpat/") + (synopsis "Alignment-free distinction between coding and noncoding RNA") + (description + "CPAT is a method to distinguish coding and noncoding RNA by using a +logistic regression model based on four pure sequence-based, linguistic +features: ORF size, ORF coverage, Ficket TESTCODE, and Hexamer usage bias. +Linguistic features based method does not require other genomes or protein +databases to perform alignment and is more robust. Because it is +alignment-free, it runs much faster and also easier to use.") + (license license:gpl2+))) + (define-public pbcopper (package (name "pbcopper") @@ -1070,6 +1139,42 @@ of single-cell data using Seurat, RcppML nmf, SingleCellExperiments and similar.") (license license:gpl2+)))) +(define-public r-stacas + (package + (name "r-stacas") + (version "2.2.0") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/carmonalab/STACAS") + (commit version))) + (file-name (git-file-name name version)) + (sha256 + (base32 "13i0h5i6vlbrb8ndq9gr81560z9d74b2c7m3rjfzls01irjza9hm")))) + (properties `((upstream-name . "STACAS"))) + (build-system r-build-system) + (propagated-inputs + (list r-biocneighbors + r-biocparallel + r-ggplot2 + r-ggridges + r-pbapply + r-r-utils + r-seurat)) + (home-page "https://github.com/carmonalab/STACAS") + (synopsis "Sub-type anchoring correction for alignment in Seurat") + (description + "This package implements methods for batch correction and integration of +scRNA-seq datasets, based on the Seurat anchor-based integration framework. +In particular, STACAS is optimized for the integration of heterogenous +datasets with only limited overlap between cell sub-types (e.g. TIL sets of +CD8 from tumor with CD8/CD4 T cells from lymphnode), for which the default +Seurat alignment methods would tend to over-correct biological differences. +The 2.0 version of the package allows the users to incorporate explicit +information about cell-types in order to assist the integration process.") + (license license:gpl3))) + (define-public r-stringendo (let ((commit "15594b1bba11048a812874bafec0eea1dcc8618a") (revision "1")) @@ -1156,6 +1261,32 @@ shape. This package provides an @code{htmlwidget} for building streamgraph visualizations.") (license license:expat)))) +(define-public r-wasabi + (let ((commit "8c33cabde8d18c2657cd6e38e7cb834f87cf9846") + (revision "1")) + (package + (name "r-wasabi") + (version (git-version "1.0.1" revision commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/COMBINE-lab/wasabi") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 "0rpdj6n4cnx8n2zl60dzgl638474sg49dknwi9x3qb4g56dpphfa")))) + (properties `((upstream-name . "wasabi"))) + (build-system r-build-system) + (propagated-inputs (list r-data-table r-rhdf5 r-rjson)) + (home-page "https://github.com/COMBINE-lab/wasabi") + (synopsis "Use Sailfish and Salmon with Sleuth") + (description + "This package converts the output of the Sailfish and Salmon RNA-seq +quantification tools so that it can be used with the Sleuth differential +analysis package.") + (license license:bsd-3)))) + (define-public pbbam (package (name "pbbam") @@ -2259,6 +2390,40 @@ sequencing data and the end result are tables of UMI-unique DamID and CEL-Seq counts.") (license license:expat))) +(define-public python-snaptools + (package + (name "python-snaptools") + (version "1.4.8") + (source + (origin + (method url-fetch) + (uri (pypi-uri "snaptools" version)) + (sha256 + (base32 + "1s5373g5jjbshh3q39zy7dlxr7nda6ksxq9d1gw46h82c4fsmfbn")))) + (build-system pyproject-build-system) + (propagated-inputs + (list python-future + python-h5py + python-louvain + python-numpy + python-pybedtools + python-pysam)) + (home-page "https://github.com/r3fang/SnapTools") + (synopsis "Tools for processing snap files" ) + (description + "@code{SnapTools} can operate on snap files the following types of +operations: + +@itemize +@item index the reference genome before alignment; +@item align reads to the corresponding reference genome; +@item pre-process by convert pair-end reads into fragments, checking the + mapping quality score, alingment and filtration; +@item create the cell-by-bin matrix. +@end itemize") + (license license:asl2.0))) + (define-public python-bioframe (package (name "python-bioframe") @@ -4893,6 +5058,126 @@ be of arbitrary length. Repeats with pattern size in the range from 1 to 2000 bases are detected.") (license license:agpl3+))) +(define-public trinityrnaseq + (package + (name "trinityrnaseq") + (version "2.13.2") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/trinityrnaseq/trinityrnaseq.git") + (commit (string-append "Trinity-v" version)) + (recursive? #true))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "1qszrxqbx4q5pavpgm4rkrh1z1v1mf7qx83vv3fnlqdmncnsf1gv")))) + (build-system gnu-build-system) + (arguments + (list + #:test-target "test" + #:modules + '((guix build gnu-build-system) + (guix build utils) + (ice-9 match) + (srfi srfi-1)) + #:make-flags + #~(list (string-append "CC=" #$(cc-for-target))) + #:phases + #~(modify-phases %standard-phases + (replace 'configure + (lambda _ + (setenv "SHELL" (which "sh")) + (setenv "CONFIG_SHELL" (which "sh")) + ;; Do not require version.h, which triggers a local build of a + ;; vendored htslib. + (substitute* "trinity-plugins/bamsifter/Makefile" + (("sift_bam_max_cov.cpp htslib/version.h") + "sift_bam_max_cov.cpp")))) + (add-after 'build 'build-plugins + (lambda _ + ;; Run this in the subdirectory to avoid running the + ;; tests right here. + (with-directory-excursion "trinity-plugins" + (invoke "make" "plugins")))) + ;; The install script uses rsync, provides no overrides for the + ;; default location at /usr/local/bin, and patching it would change + ;; all lines that do something. + (replace 'install + (lambda* (#:key inputs #:allow-other-keys) + (let ((share (string-append #$output "/share/trinity/")) + (bin (string-append #$output "/bin/"))) + (mkdir-p bin) + (copy-recursively "." share) + (delete-file (string-append share "/Chrysalis/build/CMakeFiles/CMakeOutput.log")) + (delete-file (string-append share "/Inchworm/build/CMakeFiles/CMakeOutput.log")) + + (wrap-program (string-append share "Trinity") + `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE"))) + `("PERL5LIB" ":" = (,(getenv "PERL5LIB"))) + `("PYTHONPATH" ":" = (,(getenv "GUIX_PYTHONPATH"))) + `("PATH" ":" = + ,(cons (string-append share "/trinity-plugins/BIN") + (filter-map (match-lambda + ((name . dir) + (string-append dir "/bin"))) + inputs)))) + (symlink (string-append share "Trinity") + (string-append bin "Trinity")))))))) + (inputs + (list blast+ + bowtie + fastqc + hisat + htslib + icedtea-8 + jellyfish + kallisto + multiqc + perl + perl-uri-escape + python-numpy + python-wrapper + r-ape + r-argparse + r-biobase + r-ctc + r-deseq2 + r-edger + r-fastcluster + r-glimma + r-goplot + r-goseq + r-gplots + r-minimal + r-qvalue + r-rots + r-sm + r-tidyverse + rsem + salmon + samtools + sra-tools + star + zlib)) + (propagated-inputs + (list coreutils + gzip + which)) + (native-inputs (list cmake)) + (home-page "https://github.com/trinityrnaseq/trinityrnaseq/wiki") + (synopsis "Trinity RNA-Seq de novo transcriptome assembly") + (description "Trinity assembles transcript sequences from Illumina RNA-Seq +data. Trinity represents a novel method for the efficient and robust de novo +reconstruction of transcriptomes from RNA-seq data. Trinity combines three +independent software modules: Inchworm, Chrysalis, and Butterfly, applied +sequentially to process large volumes of RNA-seq reads. Trinity partitions +the sequence data into many individual de Bruijn graphs, each representing the +transcriptional complexity at a given gene or locus, and then processes each +graph independently to extract full-length splicing isoforms and to tease +apart transcripts derived from paralogous genes.") + (license license:bsd-3))) + (define-public repeat-masker (package (name "repeat-masker") @@ -10313,6 +10598,51 @@ data. This package includes panel editing or renaming for FCS files, bead-based normalization and debarcoding.") (license license:gpl3)))) +(define-public r-projectils + (let ((commit "cc73b97471b4b6eea11ce779b5c4a7dc5c3e1709") + (revision "1")) + (package + (name "r-projectils") + (version (git-version "3.0.0" revision commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/carmonalab/ProjecTILs") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 "0dpzvbhhb9andnj7angpj32cgkwd6rs6qgpl6i21pqzcn6vqqhqw")))) + (properties `((upstream-name . "ProjecTILs"))) + (build-system r-build-system) + (propagated-inputs + (list r-biocneighbors + r-biocparallel + r-dplyr + r-ggplot2 + r-matrix + r-patchwork + r-pheatmap + r-pracma + r-purrr + r-rcolorbrewer + r-reshape2 + r-scales + r-scgate + r-seurat + r-seuratobject + r-stacas + r-ucell + r-umap + r-uwot)) + (home-page "https://github.com/carmonalab/ProjecTILs") + (synopsis "Reference-based analysis of scRNA-seq data") + (description + "This package implements methods to project single-cell RNA-seq data +onto a reference atlas, enabling interpretation of unknown cell transcriptomic +states in the the context of known, reference states.") + (license license:gpl3)))) + (define-public r-presto (let ((commit "052085db9c88aa70a28d11cc58ebc807999bf0ad") (revision "0")) @@ -10527,6 +10857,43 @@ analysis of cell types, subtypes, transcriptional gradients,cell-cycle variation, gene modules and their regulatory models and more.") (license license:expat)))) +(define-public r-sleuth + (package + (name "r-sleuth") + (version "0.30.1") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/pachterlab/sleuth") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 "09xgc7r6iisjkk0c0wn0q56zy0aph386kphwixfzq4422y7vlqci")))) + (properties `((upstream-name . "sleuth"))) + (build-system r-build-system) + (propagated-inputs (list r-aggregation + r-data-table + r-dplyr + r-ggplot2 + r-lazyeval + r-matrixstats + r-pheatmap + r-reshape2 + r-rhdf5 + r-shiny + r-tidyr)) + (native-inputs (list r-knitr)) + (home-page "https://github.com/pachterlab/sleuth") + (synopsis "Tools for investigating RNA-Seq") + (description + "Sleuth is a program for differential analysis of RNA-Seq data. +It makes use of quantification uncertainty estimates obtained via Kallisto for +accurate differential analysis of isoforms or genes, allows testing in the +context of experiments with complex designs, and supports interactive +exploratory data analysis via sleuth live.") + (license license:gpl3))) + (define-public r-snapatac (package (name "r-snapatac") @@ -16602,7 +16969,16 @@ the HiCExplorer and pyGenomeTracks packages.") (file-name (git-file-name name version)) (sha256 (base32 - "1yavgxry38g326z10bclvdf8glmma05fxj5m73h15m1r2l9xmw3v")))) + "1yavgxry38g326z10bclvdf8glmma05fxj5m73h15m1r2l9xmw3v")) + (modules '((guix build utils))) + ;; setup.py is malformed. The requirements are defined using a catchall + ;; pattern for the patch version number. This has been fixed in version + ;; 3.7.3, but we cannot upgrade to this version yet, since some Guix + ;; packages are not new enough. (See upstream commit + ;; 4845c715ec7b105e938d0c2426e27d0181690bfe for the fix). + (snippet '(substitute* "setup.py" + (("\\.\\*") + ""))))) (build-system pyproject-build-system) (arguments (list @@ -20324,24 +20700,28 @@ aligner.") (uri (pypi-uri "scvelo" version)) (sha256 (base32 "0h5ha1459ljs0qgpnlfsw592i8dxqn6p9bl08l1ikpwk36baxb7z")))) - (build-system python-build-system) + (build-system pyproject-build-system) (arguments - `(#:phases - (modify-phases %standard-phases - ;; Numba needs a writable dir to cache functions. - (add-before 'check 'set-numba-cache-dir - (lambda _ - (setenv "NUMBA_CACHE_DIR" "/tmp"))) - (replace 'check - (lambda* (#:key outputs tests? #:allow-other-keys) - (when tests? - ;; The discovered test file names must match the names of the - ;; compiled files, so we cannot run the tests from - ;; /tmp/guix-build-*. - (with-directory-excursion - (string-append (assoc-ref outputs "out") - "/lib/python3.10/site-packages/scvelo/core/tests/") - (invoke "pytest" "-v")))))))) + (list + #:test-flags + ;; XXX: these two tests fail for unknown reasons + '(list "-k" "not test_perfect_fit and not test_perfect_fit_2d") + #:phases + #~(modify-phases %standard-phases + ;; Numba needs a writable dir to cache functions. + (add-before 'check 'set-numba-cache-dir + (lambda _ + (setenv "NUMBA_CACHE_DIR" "/tmp"))) + (replace 'check + (lambda* (#:key tests? test-flags #:allow-other-keys) + (when tests? + ;; The discovered test file names must match the names of the + ;; compiled files, so we cannot run the tests from + ;; /tmp/guix-build-*. + (with-directory-excursion + (string-append #$output + "/lib/python3.10/site-packages/scvelo/core/tests/") + (apply invoke "pytest" "-v" test-flags)))))))) (propagated-inputs (list python-anndata python-hnswlib |