summaryrefslogtreecommitdiff
path: root/gnu/packages/xml.scm
diff options
context:
space:
mode:
authorLiliana Marie Prikler <liliana.prikler@gmail.com>2025-07-20 18:52:34 +0200
committerSharlatan Hellseher <sharlatanus@gmail.com>2025-07-23 16:23:25 +0100
commita823e30b744892d128c727cef9b86278b58c61af (patch)
tree06bf1123b4f9735854a04525c9841e6f94b44da7 /gnu/packages/xml.scm
parent206bfa0f03d99c9bbc90e817fe775629d5d0e7b8 (diff)
gnu: Add python-lxml-html-clean.
* gnu/packages/xml.scm (python-lxml-html-clean): New variable. Change-Id: Ia67f7cd90a172f025c810123bc3838926ef693a7 Signed-off-by: Sharlatan Hellseher <sharlatanus@gmail.com>
Diffstat (limited to 'gnu/packages/xml.scm')
-rw-r--r--gnu/packages/xml.scm33
1 files changed, 33 insertions, 0 deletions
diff --git a/gnu/packages/xml.scm b/gnu/packages/xml.scm
index 799d878054..0d1992eb20 100644
--- a/gnu/packages/xml.scm
+++ b/gnu/packages/xml.scm
@@ -1945,6 +1945,39 @@ libxml2 and libxslt.")
(define-deprecated python-lxml-4.7 python-lxml)
(export python-lxml-4.7)
+(define-public python-lxml-html-clean
+ (package
+ (name "python-lxml-html-clean")
+ (version "0.4.2")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (pypi-uri "lxml_html_clean" version))
+ (sha256
+ (base32 "1cxwrrv4kdkxwkwm12a6rh38xmb415257g31yjmk0m5rbmxiwaci"))))
+ (build-system pyproject-build-system)
+ (arguments
+ (list
+ #:phases
+ #~(modify-phases %standard-phases
+ (replace 'check
+ (lambda* (#:key tests? #:allow-other-keys)
+ (when tests?
+ (invoke "python" "-m" "unittest" "-v" "tests.test_clean")
+ (invoke "python" "-m" "doctest"
+ "tests/test_clean_embed.txt"
+ "tests/test_clean.txt"
+ "tests/test_autolink.txt")))))))
+ (propagated-inputs (list python-lxml))
+ (native-inputs (list python-setuptools python-wheel))
+ (home-page "https://github.com/fedora-python/lxml_html_clean/")
+ (synopsis "Remove superfluous content from HTML files")
+ (description "This package provides a Cleaner for cleaning up HTML pages.
+It supports removing embedded or script content, special tags and CSS style
+annotations among other features. Its main purpose is removing superfluous
+content, it is not appropriate for security sensitive environments.")
+ (license license:bsd-3)))
+
(define-public python-untangle
;; The latest tagged release is from 2014; use the latest commit.
(let ((revision "1")