diff options
author | Liliana Marie Prikler <liliana.prikler@gmail.com> | 2025-07-20 18:52:34 +0200 |
---|---|---|
committer | Sharlatan Hellseher <sharlatanus@gmail.com> | 2025-07-23 16:23:25 +0100 |
commit | a823e30b744892d128c727cef9b86278b58c61af (patch) | |
tree | 06bf1123b4f9735854a04525c9841e6f94b44da7 /gnu/packages/xml.scm | |
parent | 206bfa0f03d99c9bbc90e817fe775629d5d0e7b8 (diff) |
gnu: Add python-lxml-html-clean.
* gnu/packages/xml.scm (python-lxml-html-clean): New variable.
Change-Id: Ia67f7cd90a172f025c810123bc3838926ef693a7
Signed-off-by: Sharlatan Hellseher <sharlatanus@gmail.com>
Diffstat (limited to 'gnu/packages/xml.scm')
-rw-r--r-- | gnu/packages/xml.scm | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/gnu/packages/xml.scm b/gnu/packages/xml.scm index 799d878054..0d1992eb20 100644 --- a/gnu/packages/xml.scm +++ b/gnu/packages/xml.scm @@ -1945,6 +1945,39 @@ libxml2 and libxslt.") (define-deprecated python-lxml-4.7 python-lxml) (export python-lxml-4.7) +(define-public python-lxml-html-clean + (package + (name "python-lxml-html-clean") + (version "0.4.2") + (source + (origin + (method url-fetch) + (uri (pypi-uri "lxml_html_clean" version)) + (sha256 + (base32 "1cxwrrv4kdkxwkwm12a6rh38xmb415257g31yjmk0m5rbmxiwaci")))) + (build-system pyproject-build-system) + (arguments + (list + #:phases + #~(modify-phases %standard-phases + (replace 'check + (lambda* (#:key tests? #:allow-other-keys) + (when tests? + (invoke "python" "-m" "unittest" "-v" "tests.test_clean") + (invoke "python" "-m" "doctest" + "tests/test_clean_embed.txt" + "tests/test_clean.txt" + "tests/test_autolink.txt"))))))) + (propagated-inputs (list python-lxml)) + (native-inputs (list python-setuptools python-wheel)) + (home-page "https://github.com/fedora-python/lxml_html_clean/") + (synopsis "Remove superfluous content from HTML files") + (description "This package provides a Cleaner for cleaning up HTML pages. +It supports removing embedded or script content, special tags and CSS style +annotations among other features. Its main purpose is removing superfluous +content, it is not appropriate for security sensitive environments.") + (license license:bsd-3))) + (define-public python-untangle ;; The latest tagged release is from 2014; use the latest commit. (let ((revision "1") |