From cf03179d87de721b453aba8dacf41261f6087c91 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Wed, 30 Oct 2024 20:30:22 +0800 Subject: [PATCH 1/2] Disable YT transcript, add more export logging --- packages/api/src/jobs/export.ts | 4 +++ .../api/src/jobs/process-youtube-video.ts | 36 +++++++++---------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/packages/api/src/jobs/export.ts b/packages/api/src/jobs/export.ts index 41b15a84e0..8245eed4f0 100644 --- a/packages/api/src/jobs/export.ts +++ b/packages/api/src/jobs/export.ts @@ -127,6 +127,10 @@ const uploadToBucket = async ( export const exportJob = async (jobData: ExportJobData) => { const { userId, exportId } = jobData + logger.info('starting export job', { + userId, + exportId, + }) try { const user = await findActiveUser(userId) diff --git a/packages/api/src/jobs/process-youtube-video.ts b/packages/api/src/jobs/process-youtube-video.ts index 97d4fb0d58..2d704b0e83 100644 --- a/packages/api/src/jobs/process-youtube-video.ts +++ b/packages/api/src/jobs/process-youtube-video.ts @@ -281,24 +281,24 @@ export const processYouTubeVideo = async ( updatedLibraryItem.publishedAt = new Date(video.uploadDate) } - if ('getTranscript' in video && duration > 0 && duration < 1801) { - // If the video has a transcript available, put a placehold in and - // enqueue a job to process the full transcript - const updatedContent = await addTranscriptToReadableContent( - libraryItem.originalUrl, - libraryItem.readableContent, - TRANSCRIPT_PLACEHOLDER_TEXT - ) - - if (updatedContent) { - updatedLibraryItem.readableContent = updatedContent - } - - await enqueueProcessYouTubeTranscript({ - videoId, - ...jobData, - }) - } + // if ('getTranscript' in video && duration > 0 && duration < 1801) { + // // If the video has a transcript available, put a placehold in and + // // enqueue a job to process the full transcript + // const updatedContent = await addTranscriptToReadableContent( + // libraryItem.originalUrl, + // libraryItem.readableContent, + // TRANSCRIPT_PLACEHOLDER_TEXT + // ) + + // if (updatedContent) { + // updatedLibraryItem.readableContent = updatedContent + // } + + // await enqueueProcessYouTubeTranscript({ + // videoId, + // ...jobData, + // }) + // } if (updatedLibraryItem !== {}) { await updateLibraryItem( From 400e69465baed979f98db8947e1355d548384885 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Wed, 30 Oct 2024 20:57:11 +0800 Subject: [PATCH 2/2] Disable non-essential jobs to give more resources to the exporter --- packages/api/src/queue-processor.ts | 60 ++++++++++++++--------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/packages/api/src/queue-processor.ts b/packages/api/src/queue-processor.ts index 556809a646..62afd000a5 100644 --- a/packages/api/src/queue-processor.ts +++ b/packages/api/src/queue-processor.ts @@ -159,25 +159,25 @@ export const createWorker = (connection: ConnectionOptions) => async (job: Job) => { const executeJob = async (job: Job) => { switch (job.name) { - case 'refresh-all-feeds': { - const queue = await getQueue() - const counts = await queue?.getJobCounts('prioritized') - if (counts && counts.wait > 1000) { - return - } - return await refreshAllFeeds(appDataSource) - } - case 'refresh-feed': { - return await refreshFeed(job.data) - } + // case 'refresh-all-feeds': { + // const queue = await getQueue() + // const counts = await queue?.getJobCounts('prioritized') + // if (counts && counts.wait > 1000) { + // return + // } + // return await refreshAllFeeds(appDataSource) + // } + // case 'refresh-feed': { + // return await refreshFeed(job.data) + // } case 'save-page': { return savePageJob(job.data, job.attemptsMade) } - case 'update-pdf-content': { - return updatePDFContentJob(job.data) - } - case THUMBNAIL_JOB: - return findThumbnail(job.data) + // case 'update-pdf-content': { + // return updatePDFContentJob(job.data) + // } + // case THUMBNAIL_JOB: + // return findThumbnail(job.data) case TRIGGER_RULE_JOB_NAME: return triggerRule(job.data) case UPDATE_LABELS_JOB: @@ -192,12 +192,12 @@ export const createWorker = (connection: ConnectionOptions) => return callWebhook(job.data) case EXPORT_ITEM_JOB_NAME: return exportItem(job.data) - case AI_SUMMARIZE_JOB_NAME: - return aiSummarize(job.data) - case PROCESS_YOUTUBE_VIDEO_JOB_NAME: - return processYouTubeVideo(job.data) - case PROCESS_YOUTUBE_TRANSCRIPT_JOB_NAME: - return processYouTubeTranscript(job.data) + // case AI_SUMMARIZE_JOB_NAME: + // return aiSummarize(job.data) + // case PROCESS_YOUTUBE_VIDEO_JOB_NAME: + // return processYouTubeVideo(job.data) + // case PROCESS_YOUTUBE_TRANSCRIPT_JOB_NAME: + // return processYouTubeTranscript(job.data) case EXPORT_ALL_ITEMS_JOB_NAME: return exportAllItems(job.data) case SEND_EMAIL_JOB: @@ -210,16 +210,16 @@ export const createWorker = (connection: ConnectionOptions) => return saveNewsletterJob(job.data) case FORWARD_EMAIL_JOB: return forwardEmailJob(job.data) - case CREATE_DIGEST_JOB: - return createDigest(job.data) + // case CREATE_DIGEST_JOB: + // return createDigest(job.data) case UPLOAD_CONTENT_JOB: return uploadContentJob(job.data) - case UPDATE_HOME_JOB: - return updateHome(job.data) - case SCORE_LIBRARY_ITEM_JOB: - return scoreLibraryItem(job.data) - case GENERATE_PREVIEW_CONTENT_JOB: - return generatePreviewContent(job.data) + // case UPDATE_HOME_JOB: + // return updateHome(job.data) + // case SCORE_LIBRARY_ITEM_JOB: + // return scoreLibraryItem(job.data) + // case GENERATE_PREVIEW_CONTENT_JOB: + // return generatePreviewContent(job.data) case PRUNE_TRASH_JOB: return pruneTrashJob(job.data) case EXPIRE_FOLDERS_JOB_NAME: