From 9ae25947263cd4cc376fc210240e7b0f984a05d3 Mon Sep 17 00:00:00 2001 From: Matt Jankowski Date: Wed, 17 Apr 2024 05:00:08 -0400 Subject: [PATCH] Add reusable duplicate ID finder methods in maintenance CLI (#28910) --- lib/mastodon/cli/maintenance.rb | 61 ++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/lib/mastodon/cli/maintenance.rb b/lib/mastodon/cli/maintenance.rb index d16d55eae..0b84047a1 100644 --- a/lib/mastodon/cli/maintenance.rb +++ b/lib/mastodon/cli/maintenance.rb @@ -254,7 +254,7 @@ module Mastodon::CLI say 'Deduplicating accounts… for local accounts, you will be asked to chose which account to keep unchanged.' - find_duplicate_accounts.each do |row| + duplicate_record_ids(:accounts, "lower(username), COALESCE(lower(domain), '')").each do |row| accounts = Account.where(id: row['ids'].split(',')) if accounts.first.local? @@ -306,7 +306,7 @@ module Mastodon::CLI end def deduplicate_users_process_email - database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM users GROUP BY email HAVING count(*) > 1").each do |row| + duplicate_record_ids(:users, 'email').each do |row| users = User.where(id: row['ids'].split(',')).order(updated_at: :desc).includes(:account).to_a ref_user = users.shift say "Multiple users registered with e-mail address #{ref_user.email}.", :yellow @@ -320,7 +320,7 @@ module Mastodon::CLI end def deduplicate_users_process_confirmation_token - database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM users WHERE confirmation_token IS NOT NULL GROUP BY confirmation_token HAVING count(*) > 1").each do |row| + duplicate_record_ids_without_nulls(:users, 'confirmation_token').each do |row| users = User.where(id: row['ids'].split(',')).order(created_at: :desc).includes(:account).to_a.drop(1) say "Unsetting confirmation token for those accounts: #{users.map { |user| user.account.acct }.join(', ')}", :yellow @@ -332,7 +332,7 @@ module Mastodon::CLI def deduplicate_users_process_remember_token if migrator_version < 2022_01_18_183010 - database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM users WHERE remember_token IS NOT NULL GROUP BY remember_token HAVING count(*) > 1").each do |row| + duplicate_record_ids_without_nulls(:users, 'remember_token').each do |row| users = User.where(id: row['ids'].split(',')).order(updated_at: :desc).to_a.drop(1) say "Unsetting remember token for those accounts: #{users.map { |user| user.account.acct }.join(', ')}", :yellow @@ -344,7 +344,7 @@ module Mastodon::CLI end def deduplicate_users_process_password_token - database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM users WHERE reset_password_token IS NOT NULL GROUP BY reset_password_token HAVING count(*) > 1").each do |row| + duplicate_record_ids_without_nulls(:users, 'reset_password_token').each do |row| users = User.where(id: row['ids'].split(',')).order(updated_at: :desc).includes(:account).to_a.drop(1) say "Unsetting password reset token for those accounts: #{users.map { |user| user.account.acct }.join(', ')}", :yellow @@ -358,7 +358,7 @@ module Mastodon::CLI remove_index_if_exists!(:account_domain_blocks, 'index_account_domain_blocks_on_account_id_and_domain') say 'Removing duplicate account domain blocks…' - database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM account_domain_blocks GROUP BY account_id, domain HAVING count(*) > 1").each do |row| + duplicate_record_ids(:account_domain_blocks, 'account_id, domain').each do |row| AccountDomainBlock.where(id: row['ids'].split(',').drop(1)).delete_all end @@ -372,7 +372,7 @@ module Mastodon::CLI remove_index_if_exists!(:account_identity_proofs, 'index_account_proofs_on_account_and_provider_and_username') say 'Removing duplicate account identity proofs…' - database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM account_identity_proofs GROUP BY account_id, provider, provider_username HAVING count(*) > 1").each do |row| + duplicate_record_ids(:account_identity_proofs, 'account_id, provider, provider_username').each do |row| AccountIdentityProof.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy) end @@ -386,7 +386,7 @@ module Mastodon::CLI remove_index_if_exists!(:announcement_reactions, 'index_announcement_reactions_on_account_id_and_announcement_id') say 'Removing duplicate announcement reactions…' - database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM announcement_reactions GROUP BY account_id, announcement_id, name HAVING count(*) > 1").each do |row| + duplicate_record_ids(:announcement_reactions, 'account_id, announcement_id, name').each do |row| AnnouncementReaction.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy) end @@ -398,7 +398,7 @@ module Mastodon::CLI remove_index_if_exists!(:conversations, 'index_conversations_on_uri') say 'Deduplicating conversations…' - database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM conversations WHERE uri IS NOT NULL GROUP BY uri HAVING count(*) > 1").each do |row| + duplicate_record_ids_without_nulls(:conversations, 'uri').each do |row| conversations = Conversation.where(id: row['ids'].split(',')).order(id: :desc).to_a ref_conversation = conversations.shift @@ -421,7 +421,7 @@ module Mastodon::CLI remove_index_if_exists!(:custom_emojis, 'index_custom_emojis_on_shortcode_and_domain') say 'Deduplicating custom_emojis…' - database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM custom_emojis GROUP BY shortcode, domain HAVING count(*) > 1").each do |row| + duplicate_record_ids(:custom_emojis, 'shortcode, domain').each do |row| emojis = CustomEmoji.where(id: row['ids'].split(',')).order(id: :desc).to_a ref_emoji = emojis.shift @@ -440,7 +440,7 @@ module Mastodon::CLI remove_index_if_exists!(:custom_emoji_categories, 'index_custom_emoji_categories_on_name') say 'Deduplicating custom_emoji_categories…' - database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM custom_emoji_categories GROUP BY name HAVING count(*) > 1").each do |row| + duplicate_record_ids(:custom_emoji_categories, 'name').each do |row| categories = CustomEmojiCategory.where(id: row['ids'].split(',')).order(id: :desc).to_a ref_category = categories.shift @@ -459,7 +459,7 @@ module Mastodon::CLI remove_index_if_exists!(:domain_allows, 'index_domain_allows_on_domain') say 'Deduplicating domain_allows…' - database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM domain_allows GROUP BY domain HAVING count(*) > 1").each do |row| + duplicate_record_ids(:domain_allows, 'domain').each do |row| DomainAllow.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy) end @@ -471,7 +471,7 @@ module Mastodon::CLI remove_index_if_exists!(:domain_blocks, 'index_domain_blocks_on_domain') say 'Deduplicating domain_blocks…' - database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM domain_blocks GROUP BY domain HAVING count(*) > 1").each do |row| + duplicate_record_ids(:domain_blocks, 'domain').each do |row| domain_blocks = DomainBlock.where(id: row['ids'].split(',')).by_severity.reverse.to_a reject_media = domain_blocks.any?(&:reject_media?) @@ -497,7 +497,7 @@ module Mastodon::CLI remove_index_if_exists!(:unavailable_domains, 'index_unavailable_domains_on_domain') say 'Deduplicating unavailable_domains…' - database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM unavailable_domains GROUP BY domain HAVING count(*) > 1").each do |row| + duplicate_record_ids(:unavailable_domains, 'domain').each do |row| UnavailableDomain.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy) end @@ -509,7 +509,7 @@ module Mastodon::CLI remove_index_if_exists!(:email_domain_blocks, 'index_email_domain_blocks_on_domain') say 'Deduplicating email_domain_blocks…' - database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM email_domain_blocks GROUP BY domain HAVING count(*) > 1").each do |row| + duplicate_record_ids(:email_domain_blocks, 'domain').each do |row| domain_blocks = EmailDomainBlock.where(id: row['ids'].split(',')).order(EmailDomainBlock.arel_table[:parent_id].asc.nulls_first).to_a domain_blocks.drop(1).each(&:destroy) end @@ -522,7 +522,7 @@ module Mastodon::CLI remove_index_if_exists!(:media_attachments, 'index_media_attachments_on_shortcode') say 'Deduplicating media_attachments…' - database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM media_attachments WHERE shortcode IS NOT NULL GROUP BY shortcode HAVING count(*) > 1").each do |row| + duplicate_record_ids_without_nulls(:media_attachments, 'shortcode').each do |row| MediaAttachment.where(id: row['ids'].split(',').drop(1)).update_all(shortcode: nil) end @@ -538,7 +538,7 @@ module Mastodon::CLI remove_index_if_exists!(:preview_cards, 'index_preview_cards_on_url') say 'Deduplicating preview_cards…' - database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM preview_cards GROUP BY url HAVING count(*) > 1").each do |row| + duplicate_record_ids(:preview_cards, 'url').each do |row| PreviewCard.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy) end @@ -550,7 +550,7 @@ module Mastodon::CLI remove_index_if_exists!(:statuses, 'index_statuses_on_uri') say 'Deduplicating statuses…' - database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM statuses WHERE uri IS NOT NULL GROUP BY uri HAVING count(*) > 1").each do |row| + duplicate_record_ids_without_nulls(:statuses, 'uri').each do |row| statuses = Status.where(id: row['ids'].split(',')).order(id: :asc).to_a ref_status = statuses.shift statuses.each do |status| @@ -572,7 +572,7 @@ module Mastodon::CLI remove_index_if_exists!(:tags, 'index_tags_on_name_lower_btree') say 'Deduplicating tags…' - database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM tags GROUP BY lower((name)::text) HAVING count(*) > 1").each do |row| + duplicate_record_ids(:tags, 'lower((name)::text)').each do |row| tags = Tag.where(id: row['ids'].split(',')).order(Arel.sql('(usable::int + trendable::int + listable::int) desc')).to_a ref_tag = tags.shift tags.each do |tag| @@ -595,7 +595,7 @@ module Mastodon::CLI remove_index_if_exists!(:webauthn_credentials, 'index_webauthn_credentials_on_external_id') say 'Deduplicating webauthn_credentials…' - database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM webauthn_credentials GROUP BY external_id HAVING count(*) > 1").each do |row| + duplicate_record_ids(:webauthn_credentials, 'external_id').each do |row| WebauthnCredential.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy) end @@ -609,7 +609,7 @@ module Mastodon::CLI remove_index_if_exists!(:webhooks, 'index_webhooks_on_url') say 'Deduplicating webhooks…' - database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM webhooks GROUP BY url HAVING count(*) > 1").each do |row| + duplicate_record_ids(:webhooks, 'url').each do |row| Webhook.where(id: row['ids'].split(',')).order(id: :desc).drop(1).each(&:destroy) end @@ -746,8 +746,23 @@ module Mastodon::CLI ActiveRecord::Migrator.current_version end - def find_duplicate_accounts - database_connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM accounts GROUP BY lower(username), COALESCE(lower(domain), '') HAVING count(*) > 1") + def duplicate_record_ids_without_nulls(table, group_by) + database_connection.select_all(<<~SQL.squish) + SELECT string_agg(id::text, ',') AS ids + FROM #{table} + WHERE #{group_by} IS NOT NULL + GROUP BY #{group_by} + HAVING COUNT(*) > 1 + SQL + end + + def duplicate_record_ids(table, group_by) + database_connection.select_all(<<~SQL.squish) + SELECT string_agg(id::text, ',') AS ids + FROM #{table} + GROUP BY #{group_by} + HAVING COUNT(*) > 1 + SQL end def remove_index_if_exists!(table, name)