Add code for revision cleanup (#6442)

* Add initial code for revision cleanup

* Some improvements - code cleanup

* Cleanup logging

* Add button in admin backend to cleanup revisions of a specific pad

* Disable cleanup by default and show errors in admin area

* Improve cleanup code

* Load revisions for cleanup in parallel

* Consider saved revisions during pad cleanup
This commit is contained in:
Stefan Müller 2024-09-14 15:54:30 +02:00 committed by GitHub
parent 08f199178d
commit 1ad9418a6f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 283 additions and 3 deletions

View file

@ -14,6 +14,7 @@
"ep_adminpads2_autoupdate.title": "Aktiviert oder deaktiviert automatische Aktualisierungen für die aktuelle Abfrage.", "ep_adminpads2_autoupdate.title": "Aktiviert oder deaktiviert automatische Aktualisierungen für die aktuelle Abfrage.",
"ep_adminpads2_confirm": "Willst du das Pad {{padID}} wirklich löschen?", "ep_adminpads2_confirm": "Willst du das Pad {{padID}} wirklich löschen?",
"ep_adminpads2_delete.value": "Löschen", "ep_adminpads2_delete.value": "Löschen",
"ep_adminpads2_cleanup": "Historie aufräumen",
"ep_adminpads2_last-edited": "Zuletzt bearbeitet", "ep_adminpads2_last-edited": "Zuletzt bearbeitet",
"ep_adminpads2_loading": "Lädt...", "ep_adminpads2_loading": "Lädt...",
"ep_adminpads2_manage-pads": "Pads verwalten", "ep_adminpads2_manage-pads": "Pads verwalten",

View file

@ -4,6 +4,7 @@
"ep_adminpads2_autoupdate.title": "Enables or disables automatic updates for the current query.", "ep_adminpads2_autoupdate.title": "Enables or disables automatic updates for the current query.",
"ep_adminpads2_confirm": "Do you really want to delete the pad {{padID}}?", "ep_adminpads2_confirm": "Do you really want to delete the pad {{padID}}?",
"ep_adminpads2_delete.value": "Delete", "ep_adminpads2_delete.value": "Delete",
"ep_adminpads2_cleanup": "Cleanup revisions",
"ep_adminpads2_last-edited": "Last edited", "ep_adminpads2_last-edited": "Last edited",
"ep_adminpads2_loading": "Loading…", "ep_adminpads2_loading": "Loading…",
"ep_adminpads2_manage-pads": "Manage pads", "ep_adminpads2_manage-pads": "Manage pads",

View file

@ -6,7 +6,7 @@ import {useDebounce} from "../utils/useDebounce.ts";
import {determineSorting} from "../utils/sorting.ts"; import {determineSorting} from "../utils/sorting.ts";
import * as Dialog from "@radix-ui/react-dialog"; import * as Dialog from "@radix-ui/react-dialog";
import {IconButton} from "../components/IconButton.tsx"; import {IconButton} from "../components/IconButton.tsx";
import {ChevronLeft, ChevronRight, Eye, Trash2} from "lucide-react"; import {ChevronLeft, ChevronRight, Eye, Trash2, FileStack} from "lucide-react";
import {SearchField} from "../components/SearchField.tsx"; import {SearchField} from "../components/SearchField.tsx";
export const PadPage = ()=>{ export const PadPage = ()=>{
@ -23,6 +23,7 @@ export const PadPage = ()=>{
const pads = useStore(state=>state.pads) const pads = useStore(state=>state.pads)
const [currentPage, setCurrentPage] = useState<number>(0) const [currentPage, setCurrentPage] = useState<number>(0)
const [deleteDialog, setDeleteDialog] = useState<boolean>(false) const [deleteDialog, setDeleteDialog] = useState<boolean>(false)
const [errorText, setErrorText] = useState<string|null>(null)
const [padToDelete, setPadToDelete] = useState<string>('') const [padToDelete, setPadToDelete] = useState<string>('')
const pages = useMemo(()=>{ const pages = useMemo(()=>{
if(!pads){ if(!pads){
@ -68,12 +69,35 @@ export const PadPage = ()=>{
results: newPads results: newPads
}) })
}) })
settingsSocket.on('results:cleanupPadRevisions', (data)=>{
let newPads = useStore.getState().pads?.results ?? []
if (data.error) {
setErrorText(data.error)
return
}
newPads.forEach((pad)=>{
if (pad.padName === data.padId) {
pad.revisionNumber = data.keepRevisions
}
})
useStore.getState().setPads({
results: newPads,
total: useStore.getState().pads!.total
})
})
}, [settingsSocket, pads]); }, [settingsSocket, pads]);
const deletePad = (padID: string)=>{ const deletePad = (padID: string)=>{
settingsSocket?.emit('deletePad', padID) settingsSocket?.emit('deletePad', padID)
} }
const cleanupPad = (padID: string)=>{
settingsSocket?.emit('cleanupPadRevisions', padID)
}
return <div> return <div>
@ -100,6 +124,21 @@ export const PadPage = ()=>{
</Dialog.Content> </Dialog.Content>
</Dialog.Portal> </Dialog.Portal>
</Dialog.Root> </Dialog.Root>
<Dialog.Root open={errorText !== null}>
<Dialog.Portal>
<Dialog.Overlay className="dialog-confirm-overlay"/>
<Dialog.Content className="dialog-confirm-content">
<div>
<div>Error occured: {errorText}</div>
<div className="settings-button-bar">
<button onClick={() => {
setErrorText(null)
}}>OK</button>
</div>
</div>
</Dialog.Content>
</Dialog.Portal>
</Dialog.Root>
<h1><Trans i18nKey="ep_admin_pads:ep_adminpads2_manage-pads"/></h1> <h1><Trans i18nKey="ep_admin_pads:ep_adminpads2_manage-pads"/></h1>
<SearchField value={searchTerm} onChange={v=>setSearchTerm(v.target.value)} placeholder={t('ep_admin_pads:ep_adminpads2_search-heading')}/> <SearchField value={searchTerm} onChange={v=>setSearchTerm(v.target.value)} placeholder={t('ep_admin_pads:ep_adminpads2_search-heading')}/>
<table> <table>
@ -150,6 +189,9 @@ export const PadPage = ()=>{
setPadToDelete(pad.padName) setPadToDelete(pad.padName)
setDeleteDialog(true) setDeleteDialog(true)
}}/> }}/>
<IconButton icon={<FileStack/>} title={<Trans i18nKey="ep_admin_pads:ep_adminpads2_cleanup"/>} onClick={()=>{
cleanupPad(pad.padName)
}}/>
<IconButton icon={<Eye/>} title="view" onClick={()=>window.open(`/p/${pad.padName}`, '_blank')}/> <IconButton icon={<Eye/>} title="view" onClick={()=>window.open(`/p/${pad.padName}`, '_blank')}/>
</div> </div>
</td> </td>

View file

@ -171,6 +171,14 @@
*/ */
"showSettingsInAdminPage": "${SHOW_SETTINGS_IN_ADMIN_PAGE:true}", "showSettingsInAdminPage": "${SHOW_SETTINGS_IN_ADMIN_PAGE:true}",
/*
* Settings for cleanup of pads
*/
"cleanup": {
"enabled": false,
"keepRevisions": 5
},
/* /*
The authentication method used by the server. The authentication method used by the server.
The default value is sso The default value is sso

View file

@ -162,6 +162,14 @@
*/ */
"showSettingsInAdminPage": true, "showSettingsInAdminPage": true,
/*
* Settings for cleanup of pads
*/
"cleanup": {
"enabled": false,
"keepRevisions": 5
},
/* /*
* Node native SSL support * Node native SSL support
* *

View file

@ -1147,7 +1147,7 @@ const getChangesetInfo = async (pad: PadType, startNum: number, endNum:number, g
getPadLines(pad, startNum - 1), getPadLines(pad, startNum - 1),
// Get all needed composite Changesets. // Get all needed composite Changesets.
...compositesChangesetNeeded.map(async (item) => { ...compositesChangesetNeeded.map(async (item) => {
const changeset = await composePadChangesets(pad, item.start, item.end); const changeset = await exports.composePadChangesets(pad, item.start, item.end);
composedChangesets[`${item.start}/${item.end}`] = changeset; composedChangesets[`${item.start}/${item.end}`] = changeset;
}), }),
// Get all needed revision Dates. // Get all needed revision Dates.
@ -1213,7 +1213,7 @@ const getPadLines = async (pad: PadType, revNum: number) => {
* Tries to rebuild the composePadChangeset function of the original Etherpad * Tries to rebuild the composePadChangeset function of the original Etherpad
* https://github.com/ether/pad/blob/master/etherpad/src/etherpad/control/pad/pad_changeset_control.js#L241 * https://github.com/ether/pad/blob/master/etherpad/src/etherpad/control/pad/pad_changeset_control.js#L241
*/ */
const composePadChangesets = async (pad: PadType, startNum: number, endNum: number) => { exports.composePadChangesets = async (pad: PadType, startNum: number, endNum: number) => {
// fetch all changesets we need // fetch all changesets we need
const headNum = pad.getHeadRevisionNumber(); const headNum = pad.getHeadRevisionNumber();
endNum = Math.min(endNum, headNum + 1); endNum = Math.min(endNum, headNum + 1);

View file

@ -13,6 +13,7 @@ const settings = require('../../utils/Settings');
const UpdateCheck = require('../../utils/UpdateCheck'); const UpdateCheck = require('../../utils/UpdateCheck');
const padManager = require('../../db/PadManager'); const padManager = require('../../db/PadManager');
const api = require('../../db/API'); const api = require('../../db/API');
const cleanup = require('../../utils/Cleanup');
const queryPadLimit = 12; const queryPadLimit = 12;
@ -252,6 +253,40 @@ exports.socketio = (hookName: string, {io}: any) => {
} }
}) })
socket.on('cleanupPadRevisions', async (padId: string) => {
if (!settings.cleanup.enabled) {
socket.emit('results:cleanupPadRevisions', {
error: 'Cleanup disabled. Enable cleanup in settings.json: cleanup.enabled => true',
});
return;
}
const padExists = await padManager.doesPadExists(padId);
if (padExists) {
logger.info(`Cleanup pad revisions: ${padId}`);
try {
const result = await cleanup.deleteRevisions(padId, settings.cleanup.keepRevisions)
if (result) {
socket.emit('results:cleanupPadRevisions', {
padId: padId,
keepRevisions: settings.cleanup.keepRevisions,
});
logger.info('successful cleaned up pad: ', padId)
} else {
socket.emit('results:cleanupPadRevisions', {
error: 'Error cleaning up pad',
});
}
} catch (err: any) {
logger.error(`Error in pad ${padId}: ${err.stack || err}`);
socket.emit('results:cleanupPadRevisions', {
error: err.toString(),
});
return;
}
}
})
socket.on('restartServer', async () => { socket.on('restartServer', async () => {
logger.info('Admin request to restart server through a socket on /admin/settings'); logger.info('Admin request to restart server through a socket on /admin/settings');
settings.reloadSettings(); settings.reloadSettings();

View file

@ -0,0 +1,9 @@
import {AChangeSet} from "./PadType";
export type Revision = {
changeset: AChangeSet,
meta: {
author: string,
timestamp: number,
}
}

168
src/node/utils/Cleanup.ts Normal file
View file

@ -0,0 +1,168 @@
'use strict'
import {AChangeSet} from "../types/PadType";
import {Revision} from "../types/Revision";
const promises = require('./promises');
const padManager = require('ep_etherpad-lite/node/db/PadManager');
const db = require('ep_etherpad-lite/node/db/DB');
const Changeset = require('ep_etherpad-lite/static/js/Changeset');
const padMessageHandler = require('ep_etherpad-lite/node/handler/PadMessageHandler');
const log4js = require('log4js');
const logger = log4js.getLogger('cleanup');
exports.deleteAllRevisions = async (padID: string): Promise<void> => {
const randomPadId = padID + 'aertdfdf' + Math.random().toString(10)
let pad = await padManager.getPad(padID);
await pad.copyPadWithoutHistory(randomPadId, false);
pad = await padManager.getPad(randomPadId);
await pad.copyPadWithoutHistory(padID, true);
await pad.remove();
}
const createRevision = async (aChangeset: AChangeSet, timestamp: number, isKeyRev: boolean, authorId: string, atext: any, pool: any) => {
if (authorId !== '') pool.putAttrib(['author', authorId]);
return {
changeset: aChangeset,
meta: {
author: authorId,
timestamp: timestamp,
...isKeyRev ? {
pool: pool,
atext: atext,
} : {},
},
};
}
exports.deleteRevisions = async (padId: string, keepRevisions: number): Promise<boolean> => {
logger.debug('Start cleanup revisions', padId)
let pad = await padManager.getPad(padId);
await pad.check()
logger.debug('Initial pad is valid')
if (pad.head <= keepRevisions) {
logger.debug('Pad has not enough revisions')
return false
}
padMessageHandler.kickSessionsFromPad(padId)
const cleanupUntilRevision = pad.head - keepRevisions
logger.debug('Composing changesets: ', cleanupUntilRevision)
const changeset = await padMessageHandler.composePadChangesets(pad, 0, cleanupUntilRevision + 1)
const revisions: Revision[] = [];
await promises.timesLimit(keepRevisions + 1, 500, async (i: number) => {
const rev = i + cleanupUntilRevision
revisions[rev] = await pad.getRevision(rev)
});
logger.debug('Loaded revisions: ', revisions.length)
await promises.timesLimit(pad.head + 1, 500, async (i: string) => {
await db.remove(`pad:${padId}:revs:${i}`, null);
});
let padContent = await db.get(`pad:${padId}`)
padContent.head = keepRevisions
if (padContent.savedRevisions) {
let newSavedRevisions = []
for (let i = 0; i < padContent.savedRevisions.length; i++) {
if (padContent.savedRevisions[i].revNum > cleanupUntilRevision) {
padContent.savedRevisions[i].revNum = padContent.savedRevisions[i].revNum - cleanupUntilRevision
newSavedRevisions.push(padContent.savedRevisions[i])
}
}
padContent.savedRevisions = newSavedRevisions
}
await db.set(`pad:${padId}`, padContent);
let newAText = Changeset.makeAText('\n');
let pool = pad.apool()
newAText = Changeset.applyToAText(changeset, newAText, pool);
const revision = await createRevision(
changeset,
revisions[cleanupUntilRevision].meta.timestamp,
0 === pad.getKeyRevisionNumber(0),
'',
newAText,
pool
);
const p: Promise<void>[] = [];
p.push(db.set(`pad:${padId}:revs:0`, revision))
p.push(promises.timesLimit(keepRevisions, 500, async (i: number) => {
const rev = i + cleanupUntilRevision + 1
const newRev = rev - cleanupUntilRevision;
newAText = Changeset.applyToAText(revisions[rev].changeset, newAText, pool);
const revision = await createRevision(
revisions[rev].changeset,
revisions[rev].meta.timestamp,
newRev === pad.getKeyRevisionNumber(newRev),
revisions[rev].meta.author,
newAText,
pool
);
await db.set(`pad:${padId}:revs:${newRev}`, revision);
}));
await Promise.all(p)
logger.debug('Finished migration. Checking pad now')
padManager.unloadPad(padId);
let newPad = await padManager.getPad(padId);
await newPad.check();
return true
}
exports.checkTodos = async () => {
await new Promise(resolve => setTimeout(resolve, 5000));
// TODO: Move to settings
const settings = {
minHead: 100,
keepRevisions: 100,
minAge: 1,//1000 * 60 * 60 * 24,
}
await Promise.all((await padManager.listAllPads()).padIDs.map(async (padId: string) => {
// TODO: Handle concurrency
const pad = await padManager.getPad(padId);
const revisionDate = await pad.getRevisionDate(pad.getHeadRevisionNumber())
if (pad.head < settings.minHead || padMessageHandler.padUsersCount(padId) > 0 || Date.now() < revisionDate + settings.minAge) {
return
}
try {
const result = await exports.deleteRevisions(padId, settings.keepRevisions)
if (result) {
logger.info('successful cleaned up pad: ', padId)
}
} catch (err: any) {
logger.error(`Error in pad ${padId}: ${err.stack || err}`);
return;
}
}));
}

View file

@ -380,6 +380,14 @@ exports.sso = {
*/ */
exports.showSettingsInAdminPage = true; exports.showSettingsInAdminPage = true;
/*
* Settings for cleanup of pads
*/
exports.cleanup = {
enabled: false,
keepRevisions: 100,
}
/* /*
* By default, when caret is moved out of viewport, it scrolls the minimum * By default, when caret is moved out of viewport, it scrolls the minimum
* height needed to make this line visible. * height needed to make this line visible.