From 86afbf25d01349bd2fe0ee98f1d60890ef71b7b9 Mon Sep 17 00:00:00 2001 From: Claire Date: Wed, 30 Aug 2023 17:36:16 +0200 Subject: [PATCH] Change text extraction in `PlainTextFormatter` to be faster (#26727) --- app/lib/plain_text_formatter.rb | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/app/lib/plain_text_formatter.rb b/app/lib/plain_text_formatter.rb index 6fa2bc5d2..d1ff6808b 100644 --- a/app/lib/plain_text_formatter.rb +++ b/app/lib/plain_text_formatter.rb @@ -1,9 +1,7 @@ # frozen_string_literal: true class PlainTextFormatter - include ActionView::Helpers::TextHelper - - NEWLINE_TAGS_RE = /(
|
|<\/p>)+/.freeze + NEWLINE_TAGS_RE = %r{(
|
|

)+} attr_reader :text, :local @@ -18,7 +16,10 @@ class PlainTextFormatter if local? text else - html_entities.decode(strip_tags(insert_newlines)).chomp + node = Nokogiri::HTML.fragment(insert_newlines) + # Elements that are entirely removed with our Sanitize config + node.xpath('.//iframe|.//math|.//noembed|.//noframes|.//noscript|.//plaintext|.//script|.//style|.//svg|.//xmp').remove + node.text.chomp end end @@ -27,8 +28,4 @@ class PlainTextFormatter def insert_newlines text.gsub(NEWLINE_TAGS_RE) { |match| "#{match}\n" } end - - def html_entities - HTMLEntities.new - end end