笔记:筛选出来对用户而言没什么用的帖子
中继站经常会传入大量的,其实用户根本就不会去看一眼的帖子
如果我们认为这样的帖子是对用户而言没什么用的:
- 不是被本站用户,或者本站用户关注或者被关注的人发的
- 且不是被上述用户直接或者间接回复(在同一个 thread),也没有被转发或者转发
- 没有被本站用户或者远程用户收藏或点赞或者投票(等任何方式“交互”)
- 已经在数据库中呆了15天没有改过了
那么极大概率这些帖子完全不会被用到,理论上说可以从数据库中安全删掉。
写了个脚本找到这些帖子。( should_keep_notes_ids_uniq
则是相反,必须要保留的帖子)
WITH
should_keep_users AS (
SELECT id FROM "user" WHERE host is NULL
UNION
SELECT "followeeId" as id FROM "following"
UNION
SELECT "followerId" as id FROM "following"
),
should_keep_notes_data AS (
SELECT n.id as nid, n."threadId" as ntid, n."renoteId" as nrid
FROM should_keep_users su
JOIN "note" n
ON n."userId" = su.id
),
should_keep_notes_ids_not_uniq AS (
SELECT nid as id FROM should_keep_notes_data
UNION ALL
SELECT n.id as id
FROM "note" n
WHERE n."threadId" IN ( SELECT ntid as id FROM should_keep_notes_data WHERE ntid IS NOT NULL )
UNION ALL
SELECT n.id as id
FROM "note" n
WHERE n."id" IN ( SELECT nrid as id FROM should_keep_notes_data WHERE nrid IS NOT NULL )
UNION ALL
SELECT n.id as id
FROM "note" n
WHERE n."renoteId" IN ( SELECT nid as id FROM should_keep_notes_data )
UNION ALL
SELECT n.id as id
FROM "note" n
WHERE n."renoteId" IN ( SELECT nrid as id FROM should_keep_notes_data WHERE nrid IS NOT NULL )
UNION ALL
(select "noteId" as id from note_reaction) UNION ALL
(select "noteId" as id from user_note_pining) UNION ALL
(select "noteId" as id from note_unread) UNION ALL
(select "noteId" as id from note_watching) UNION ALL
(select "noteId" as id from note_favorite) UNION ALL
(select "noteId" as id from poll_vote) UNION ALL
(select "noteId" as id from poll)
),
should_keep_notes_ids_uniq AS (
SELECT DISTINCT id from should_keep_notes_ids_not_uniq
),
should_delete_notes_ids AS (
(
SELECT id
FROM "note" n
WHERE n."userHost" IS NOT NULL
AND "updatedAt" < (current_timestamp - interval '15 day')
)
EXCEPT
(
SELECT id FROM should_keep_notes_ids_uniq
)
)
SELECT count(*) from should_delete_notes_ids;