Change text extraction in PlainTextFormatter to be faster (#26727)

This commit is contained in:
Claire 2023-08-30 17:36:16 +02:00 committed by GitHub
parent 21ec596dab
commit 5c38c3a9a1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -1,8 +1,6 @@
# frozen_string_literal: true # frozen_string_literal: true
class PlainTextFormatter class PlainTextFormatter
include ActionView::Helpers::TextHelper
NEWLINE_TAGS_RE = %r{(<br />|<br>|</p>)+} NEWLINE_TAGS_RE = %r{(<br />|<br>|</p>)+}
attr_reader :text, :local attr_reader :text, :local
@ -18,7 +16,10 @@ class PlainTextFormatter
if local? if local?
text text
else else
html_entities.decode(strip_tags(insert_newlines)).chomp node = Nokogiri::HTML.fragment(insert_newlines)
# Elements that are entirely removed with our Sanitize config
node.xpath('.//iframe|.//math|.//noembed|.//noframes|.//noscript|.//plaintext|.//script|.//style|.//svg|.//xmp').remove
node.text.chomp
end end
end end
@ -27,8 +28,4 @@ class PlainTextFormatter
def insert_newlines def insert_newlines
text.gsub(NEWLINE_TAGS_RE) { |match| "#{match}\n" } text.gsub(NEWLINE_TAGS_RE) { |match| "#{match}\n" }
end end
def html_entities
HTMLEntities.new
end
end end