summaryrefslogtreecommitdiff
path: root/gnu/packages/bioinformatics.scm
diff options
context:
space:
mode:
authorLiliana Marie Prikler <liliana.prikler@gmail.com>2024-01-07 18:31:32 +0100
committerLiliana Marie Prikler <liliana.prikler@gmail.com>2024-01-07 18:31:32 +0100
commit2cbb1a6e1e2679c7db74b6b906d223fb3661d09f (patch)
treeab4101e769bbac1d53950004044fd297223b9492 /gnu/packages/bioinformatics.scm
parentb369dc272eeba4eafb2bc7dc56f7b3cf09fb0961 (diff)
parentb212e6934643e085f168a5364cb593f61aa616ba (diff)
Merge branch 'master' into gnome-team
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r--gnu/packages/bioinformatics.scm416
1 files changed, 398 insertions, 18 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 2e4bc876d4..7dbebcf3da 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -130,6 +130,7 @@
#:use-module (gnu packages pdf)
#:use-module (gnu packages perl)
#:use-module (gnu packages perl-check)
+ #:use-module (gnu packages perl-web)
#:use-module (gnu packages pkg-config)
#:use-module (gnu packages popt)
#:use-module (gnu packages protobuf)
@@ -574,6 +575,30 @@ BED, GFF/GTF, VCF.")
whole-genome bisulfite sequencing (WGBS) reads from directional protocol.")
(license license:asl2.0)))
+(define-public bustools
+ (package
+ (name "bustools")
+ (version "0.43.2")
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/BUStools/bustools")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "068kjlc4d528269nl5mc3j8h2c95r1v545d3fi1iw1ckg8rba0hg"))))
+ (build-system cmake-build-system)
+ (arguments (list #:tests? #f)) ;no test target
+ (inputs (list zlib))
+ (home-page "https://bustools.github.io")
+ (synopsis "Tools for working with BUS files")
+ (description "bustools is a program for manipulating BUS files for single
+cell RNA-Seq datasets. It can be used to error correct barcodes, collapse
+UMIs, produce gene count or transcript compatibility count matrices, and is useful
+for many other tasks.")
+ (license license:bsd-2)))
+
(define-public cellsnp-lite
;; Last release is from November 2021 and does not contain fixes.
(let ((commit "0885d746b0b1ea65c8ef92f8943ca7669ca9734a")
@@ -617,6 +642,50 @@ Compared to cellSNP, this package is more efficient with higher speed and less
memory usage.")
(license license:asl2.0))))
+(define-public cpat
+ (package
+ (name "cpat")
+ (version "3.0.4")
+ (source (origin
+ (method url-fetch)
+ (uri (pypi-uri "CPAT" version))
+ (sha256
+ (base32
+ "0dfrwwbhv1n4nh2a903d1qfb30fgxgya89sa70aci3wzf8h2z0vd"))
+ (modules '((guix build utils)))
+ (snippet
+ '(for-each delete-file-recursively
+ (list ".eggs"
+ "lib/__pycache__/"
+ "lib/cpmodule/__pycache__/")))))
+ (build-system pyproject-build-system)
+ (arguments
+ (list
+ #:phases
+ '(modify-phases %standard-phases
+ (replace 'check
+ (lambda* (#:key tests? #:allow-other-keys)
+ (when tests?
+ (with-directory-excursion "test"
+ ;; There is no test4.fa
+ (substitute* "test.sh"
+ ((".*-g test4.fa.*") ""))
+ (invoke "bash" "test.sh"))))))))
+ (propagated-inputs
+ (list python-numpy python-pysam))
+ (inputs
+ (list r-minimal))
+ (home-page "https://wlcb.oit.uci.edu/cpat/")
+ (synopsis "Alignment-free distinction between coding and noncoding RNA")
+ (description
+ "CPAT is a method to distinguish coding and noncoding RNA by using a
+logistic regression model based on four pure sequence-based, linguistic
+features: ORF size, ORF coverage, Ficket TESTCODE, and Hexamer usage bias.
+Linguistic features based method does not require other genomes or protein
+databases to perform alignment and is more robust. Because it is
+alignment-free, it runs much faster and also easier to use.")
+ (license license:gpl2+)))
+
(define-public pbcopper
(package
(name "pbcopper")
@@ -1070,6 +1139,42 @@ of single-cell data using Seurat, RcppML nmf, SingleCellExperiments and
similar.")
(license license:gpl2+))))
+(define-public r-stacas
+ (package
+ (name "r-stacas")
+ (version "2.2.0")
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/carmonalab/STACAS")
+ (commit version)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "13i0h5i6vlbrb8ndq9gr81560z9d74b2c7m3rjfzls01irjza9hm"))))
+ (properties `((upstream-name . "STACAS")))
+ (build-system r-build-system)
+ (propagated-inputs
+ (list r-biocneighbors
+ r-biocparallel
+ r-ggplot2
+ r-ggridges
+ r-pbapply
+ r-r-utils
+ r-seurat))
+ (home-page "https://github.com/carmonalab/STACAS")
+ (synopsis "Sub-type anchoring correction for alignment in Seurat")
+ (description
+ "This package implements methods for batch correction and integration of
+scRNA-seq datasets, based on the Seurat anchor-based integration framework.
+In particular, STACAS is optimized for the integration of heterogenous
+datasets with only limited overlap between cell sub-types (e.g. TIL sets of
+CD8 from tumor with CD8/CD4 T cells from lymphnode), for which the default
+Seurat alignment methods would tend to over-correct biological differences.
+The 2.0 version of the package allows the users to incorporate explicit
+information about cell-types in order to assist the integration process.")
+ (license license:gpl3)))
+
(define-public r-stringendo
(let ((commit "15594b1bba11048a812874bafec0eea1dcc8618a")
(revision "1"))
@@ -1156,6 +1261,32 @@ shape. This package provides an @code{htmlwidget} for building streamgraph
visualizations.")
(license license:expat))))
+(define-public r-wasabi
+ (let ((commit "8c33cabde8d18c2657cd6e38e7cb834f87cf9846")
+ (revision "1"))
+ (package
+ (name "r-wasabi")
+ (version (git-version "1.0.1" revision commit))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/COMBINE-lab/wasabi")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "0rpdj6n4cnx8n2zl60dzgl638474sg49dknwi9x3qb4g56dpphfa"))))
+ (properties `((upstream-name . "wasabi")))
+ (build-system r-build-system)
+ (propagated-inputs (list r-data-table r-rhdf5 r-rjson))
+ (home-page "https://github.com/COMBINE-lab/wasabi")
+ (synopsis "Use Sailfish and Salmon with Sleuth")
+ (description
+ "This package converts the output of the Sailfish and Salmon RNA-seq
+quantification tools so that it can be used with the Sleuth differential
+analysis package.")
+ (license license:bsd-3))))
+
(define-public pbbam
(package
(name "pbbam")
@@ -2259,6 +2390,40 @@ sequencing data and the end result are tables of UMI-unique DamID and CEL-Seq
counts.")
(license license:expat)))
+(define-public python-snaptools
+ (package
+ (name "python-snaptools")
+ (version "1.4.8")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (pypi-uri "snaptools" version))
+ (sha256
+ (base32
+ "1s5373g5jjbshh3q39zy7dlxr7nda6ksxq9d1gw46h82c4fsmfbn"))))
+ (build-system pyproject-build-system)
+ (propagated-inputs
+ (list python-future
+ python-h5py
+ python-louvain
+ python-numpy
+ python-pybedtools
+ python-pysam))
+ (home-page "https://github.com/r3fang/SnapTools")
+ (synopsis "Tools for processing snap files" )
+ (description
+ "@code{SnapTools} can operate on snap files the following types of
+operations:
+
+@itemize
+@item index the reference genome before alignment;
+@item align reads to the corresponding reference genome;
+@item pre-process by convert pair-end reads into fragments, checking the
+ mapping quality score, alingment and filtration;
+@item create the cell-by-bin matrix.
+@end itemize")
+ (license license:asl2.0)))
+
(define-public python-bioframe
(package
(name "python-bioframe")
@@ -4893,6 +5058,126 @@ be of arbitrary length. Repeats with pattern size in the range from 1 to 2000
bases are detected.")
(license license:agpl3+)))
+(define-public trinityrnaseq
+ (package
+ (name "trinityrnaseq")
+ (version "2.13.2")
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/trinityrnaseq/trinityrnaseq.git")
+ (commit (string-append "Trinity-v" version))
+ (recursive? #true)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "1qszrxqbx4q5pavpgm4rkrh1z1v1mf7qx83vv3fnlqdmncnsf1gv"))))
+ (build-system gnu-build-system)
+ (arguments
+ (list
+ #:test-target "test"
+ #:modules
+ '((guix build gnu-build-system)
+ (guix build utils)
+ (ice-9 match)
+ (srfi srfi-1))
+ #:make-flags
+ #~(list (string-append "CC=" #$(cc-for-target)))
+ #:phases
+ #~(modify-phases %standard-phases
+ (replace 'configure
+ (lambda _
+ (setenv "SHELL" (which "sh"))
+ (setenv "CONFIG_SHELL" (which "sh"))
+ ;; Do not require version.h, which triggers a local build of a
+ ;; vendored htslib.
+ (substitute* "trinity-plugins/bamsifter/Makefile"
+ (("sift_bam_max_cov.cpp htslib/version.h")
+ "sift_bam_max_cov.cpp"))))
+ (add-after 'build 'build-plugins
+ (lambda _
+ ;; Run this in the subdirectory to avoid running the
+ ;; tests right here.
+ (with-directory-excursion "trinity-plugins"
+ (invoke "make" "plugins"))))
+ ;; The install script uses rsync, provides no overrides for the
+ ;; default location at /usr/local/bin, and patching it would change
+ ;; all lines that do something.
+ (replace 'install
+ (lambda* (#:key inputs #:allow-other-keys)
+ (let ((share (string-append #$output "/share/trinity/"))
+ (bin (string-append #$output "/bin/")))
+ (mkdir-p bin)
+ (copy-recursively "." share)
+ (delete-file (string-append share "/Chrysalis/build/CMakeFiles/CMakeOutput.log"))
+ (delete-file (string-append share "/Inchworm/build/CMakeFiles/CMakeOutput.log"))
+
+ (wrap-program (string-append share "Trinity")
+ `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))
+ `("PERL5LIB" ":" = (,(getenv "PERL5LIB")))
+ `("PYTHONPATH" ":" = (,(getenv "GUIX_PYTHONPATH")))
+ `("PATH" ":" =
+ ,(cons (string-append share "/trinity-plugins/BIN")
+ (filter-map (match-lambda
+ ((name . dir)
+ (string-append dir "/bin")))
+ inputs))))
+ (symlink (string-append share "Trinity")
+ (string-append bin "Trinity"))))))))
+ (inputs
+ (list blast+
+ bowtie
+ fastqc
+ hisat
+ htslib
+ icedtea-8
+ jellyfish
+ kallisto
+ multiqc
+ perl
+ perl-uri-escape
+ python-numpy
+ python-wrapper
+ r-ape
+ r-argparse
+ r-biobase
+ r-ctc
+ r-deseq2
+ r-edger
+ r-fastcluster
+ r-glimma
+ r-goplot
+ r-goseq
+ r-gplots
+ r-minimal
+ r-qvalue
+ r-rots
+ r-sm
+ r-tidyverse
+ rsem
+ salmon
+ samtools
+ sra-tools
+ star
+ zlib))
+ (propagated-inputs
+ (list coreutils
+ gzip
+ which))
+ (native-inputs (list cmake))
+ (home-page "https://github.com/trinityrnaseq/trinityrnaseq/wiki")
+ (synopsis "Trinity RNA-Seq de novo transcriptome assembly")
+ (description "Trinity assembles transcript sequences from Illumina RNA-Seq
+data. Trinity represents a novel method for the efficient and robust de novo
+reconstruction of transcriptomes from RNA-seq data. Trinity combines three
+independent software modules: Inchworm, Chrysalis, and Butterfly, applied
+sequentially to process large volumes of RNA-seq reads. Trinity partitions
+the sequence data into many individual de Bruijn graphs, each representing the
+transcriptional complexity at a given gene or locus, and then processes each
+graph independently to extract full-length splicing isoforms and to tease
+apart transcripts derived from paralogous genes.")
+ (license license:bsd-3)))
+
(define-public repeat-masker
(package
(name "repeat-masker")
@@ -10313,6 +10598,51 @@ data. This package includes panel editing or renaming for FCS files,
bead-based normalization and debarcoding.")
(license license:gpl3))))
+(define-public r-projectils
+ (let ((commit "cc73b97471b4b6eea11ce779b5c4a7dc5c3e1709")
+ (revision "1"))
+ (package
+ (name "r-projectils")
+ (version (git-version "3.0.0" revision commit))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/carmonalab/ProjecTILs")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "0dpzvbhhb9andnj7angpj32cgkwd6rs6qgpl6i21pqzcn6vqqhqw"))))
+ (properties `((upstream-name . "ProjecTILs")))
+ (build-system r-build-system)
+ (propagated-inputs
+ (list r-biocneighbors
+ r-biocparallel
+ r-dplyr
+ r-ggplot2
+ r-matrix
+ r-patchwork
+ r-pheatmap
+ r-pracma
+ r-purrr
+ r-rcolorbrewer
+ r-reshape2
+ r-scales
+ r-scgate
+ r-seurat
+ r-seuratobject
+ r-stacas
+ r-ucell
+ r-umap
+ r-uwot))
+ (home-page "https://github.com/carmonalab/ProjecTILs")
+ (synopsis "Reference-based analysis of scRNA-seq data")
+ (description
+ "This package implements methods to project single-cell RNA-seq data
+onto a reference atlas, enabling interpretation of unknown cell transcriptomic
+states in the the context of known, reference states.")
+ (license license:gpl3))))
+
(define-public r-presto
(let ((commit "052085db9c88aa70a28d11cc58ebc807999bf0ad")
(revision "0"))
@@ -10527,6 +10857,43 @@ analysis of cell types, subtypes, transcriptional gradients,cell-cycle
variation, gene modules and their regulatory models and more.")
(license license:expat))))
+(define-public r-sleuth
+ (package
+ (name "r-sleuth")
+ (version "0.30.1")
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/pachterlab/sleuth")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "09xgc7r6iisjkk0c0wn0q56zy0aph386kphwixfzq4422y7vlqci"))))
+ (properties `((upstream-name . "sleuth")))
+ (build-system r-build-system)
+ (propagated-inputs (list r-aggregation
+ r-data-table
+ r-dplyr
+ r-ggplot2
+ r-lazyeval
+ r-matrixstats
+ r-pheatmap
+ r-reshape2
+ r-rhdf5
+ r-shiny
+ r-tidyr))
+ (native-inputs (list r-knitr))
+ (home-page "https://github.com/pachterlab/sleuth")
+ (synopsis "Tools for investigating RNA-Seq")
+ (description
+ "Sleuth is a program for differential analysis of RNA-Seq data.
+It makes use of quantification uncertainty estimates obtained via Kallisto for
+accurate differential analysis of isoforms or genes, allows testing in the
+context of experiments with complex designs, and supports interactive
+exploratory data analysis via sleuth live.")
+ (license license:gpl3)))
+
(define-public r-snapatac
(package
(name "r-snapatac")
@@ -16602,7 +16969,16 @@ the HiCExplorer and pyGenomeTracks packages.")
(file-name (git-file-name name version))
(sha256
(base32
- "1yavgxry38g326z10bclvdf8glmma05fxj5m73h15m1r2l9xmw3v"))))
+ "1yavgxry38g326z10bclvdf8glmma05fxj5m73h15m1r2l9xmw3v"))
+ (modules '((guix build utils)))
+ ;; setup.py is malformed. The requirements are defined using a catchall
+ ;; pattern for the patch version number. This has been fixed in version
+ ;; 3.7.3, but we cannot upgrade to this version yet, since some Guix
+ ;; packages are not new enough. (See upstream commit
+ ;; 4845c715ec7b105e938d0c2426e27d0181690bfe for the fix).
+ (snippet '(substitute* "setup.py"
+ (("\\.\\*")
+ "")))))
(build-system pyproject-build-system)
(arguments
(list
@@ -20324,24 +20700,28 @@ aligner.")
(uri (pypi-uri "scvelo" version))
(sha256
(base32 "0h5ha1459ljs0qgpnlfsw592i8dxqn6p9bl08l1ikpwk36baxb7z"))))
- (build-system python-build-system)
+ (build-system pyproject-build-system)
(arguments
- `(#:phases
- (modify-phases %standard-phases
- ;; Numba needs a writable dir to cache functions.
- (add-before 'check 'set-numba-cache-dir
- (lambda _
- (setenv "NUMBA_CACHE_DIR" "/tmp")))
- (replace 'check
- (lambda* (#:key outputs tests? #:allow-other-keys)
- (when tests?
- ;; The discovered test file names must match the names of the
- ;; compiled files, so we cannot run the tests from
- ;; /tmp/guix-build-*.
- (with-directory-excursion
- (string-append (assoc-ref outputs "out")
- "/lib/python3.10/site-packages/scvelo/core/tests/")
- (invoke "pytest" "-v"))))))))
+ (list
+ #:test-flags
+ ;; XXX: these two tests fail for unknown reasons
+ '(list "-k" "not test_perfect_fit and not test_perfect_fit_2d")
+ #:phases
+ #~(modify-phases %standard-phases
+ ;; Numba needs a writable dir to cache functions.
+ (add-before 'check 'set-numba-cache-dir
+ (lambda _
+ (setenv "NUMBA_CACHE_DIR" "/tmp")))
+ (replace 'check
+ (lambda* (#:key tests? test-flags #:allow-other-keys)
+ (when tests?
+ ;; The discovered test file names must match the names of the
+ ;; compiled files, so we cannot run the tests from
+ ;; /tmp/guix-build-*.
+ (with-directory-excursion
+ (string-append #$output
+ "/lib/python3.10/site-packages/scvelo/core/tests/")
+ (apply invoke "pytest" "-v" test-flags))))))))
(propagated-inputs
(list python-anndata
python-hnswlib