Files
tdarr-plugs/Local/Tdarr_Plugin_05_subtitle_extraction.js

241 lines
8.9 KiB
JavaScript
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/* eslint-disable no-plusplus */
const details = () => ({
id: 'Tdarr_Plugin_05_subtitle_extraction',
Stage: 'Pre-processing',
Name: '05 - Subtitle Extraction',
Type: 'Video',
Operation: 'Transcode',
Description: `
Extracts embedded subtitles to external .srt files.
- Optionally removes embedded subtitles after extraction
- Skips commentary/description tracks if configured
- Skips image-based subtitles (PGS/VobSub - cannot extract to SRT)
**Single Responsibility**: External file extraction only.
Run AFTER subtitle conversion.
`,
Version: '4.0.0',
Tags: 'action,ffmpeg,subtitles,srt,extract',
Inputs: [
{
name: 'extract_subtitles',
type: 'string',
defaultValue: 'true*',
inputUI: { type: 'dropdown', options: ['true*', 'false'] },
tooltip: 'Extract embedded text subtitles to external .srt files.',
},
{
name: 'remove_after_extract',
type: 'string',
defaultValue: 'false',
inputUI: { type: 'dropdown', options: ['false', 'true'] },
tooltip: 'Remove embedded subtitles from container after extracting them.',
},
{
name: 'skip_commentary',
type: 'string',
defaultValue: 'true*',
inputUI: { type: 'dropdown', options: ['true*', 'false'] },
tooltip: 'Skip extracting subtitles with "commentary" or "description" in the title.',
},
{
name: 'extract_languages',
type: 'string',
defaultValue: '',
inputUI: { type: 'text' },
tooltip: 'Comma-separated language codes to extract. Empty = extract all.',
},
],
});
// Constants
const IMAGE_SUBTITLES = new Set(['hdmv_pgs_subtitle', 'dvd_subtitle', 'dvdsub']);
const UNSUPPORTED_SUBTITLES = new Set(['eia_608', 'cc_dec', 'tx3g']);
const MIN_SUBTITLE_SIZE = 100;
const MAX_FILENAME_ATTEMPTS = 100;
const BOOLEAN_INPUTS = ['extract_subtitles', 'remove_after_extract', 'skip_commentary'];
// Utilities
const stripStar = (v) => (typeof v === 'string' ? v.replace(/\*/g, '') : v);
const sanitizeFilename = (name, maxLen = 50) => {
if (typeof name !== 'string') return 'file';
name = name.replace(/[<>:"|?*\/\\\x00-\x1f\x7f]/g, '_').replace(/^[.\s]+|[.\s]+$/g, '');
return name.length === 0 ? 'file' : name.substring(0, maxLen);
};
const sanitizeForShell = (str) => {
if (typeof str !== 'string') throw new TypeError('Input must be a string');
str = str.replace(/\0/g, '');
return `"${str.replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\$/g, '\\$')}"`;
};
const fileExistsValid = (filePath, fs) => {
try { return fs.statSync(filePath).size > MIN_SUBTITLE_SIZE; }
catch { return false; }
};
const plugin = (file, librarySettings, inputs, otherArguments) => {
const lib = require('../methods/lib')();
const fs = require('fs');
const path = require('path');
const { execSync } = require('child_process');
const response = {
processFile: false,
preset: '',
container: `.${file.container}`,
handbrakeMode: false,
ffmpegMode: true,
reQueueAfter: false,
infoLog: '',
};
try {
// Sanitize inputs and convert booleans
inputs = lib.loadDefaultValues(inputs, details);
Object.keys(inputs).forEach((k) => { inputs[k] = stripStar(inputs[k]); });
BOOLEAN_INPUTS.forEach((k) => { inputs[k] = inputs[k] === 'true'; });
if (!inputs.extract_subtitles) {
response.infoLog = '✅ Subtitle extraction disabled. ';
return response;
}
const streams = file.ffProbeData?.streams;
if (!Array.isArray(streams)) {
response.infoLog = '❌ No stream data available. ';
return response;
}
// Parse language filter
const extractLangs = inputs.extract_languages
? new Set(inputs.extract_languages.split(',').map((l) => l.trim().toLowerCase()).filter(Boolean))
: null;
const subtitleStreams = streams
.map((s, i) => ({ ...s, index: i }))
.filter((s) => s.codec_type === 'subtitle');
if (subtitleStreams.length === 0) {
response.infoLog = '✅ No subtitle streams to extract. ';
return response;
}
// Detect cache cycle
const isInCache = (file._id || file.file).includes('-TdarrCacheFile-');
const stableId = (file._id || file.file).replace(/-TdarrCacheFile-[a-zA-Z0-9]+/, '');
const basePath = path.join(path.dirname(file.file), path.basename(stableId, path.extname(stableId)));
// Skip if in cache and NOT removing subtitles (prevents infinite loop)
if (isInCache && !inputs.remove_after_extract) {
response.infoLog = ' In cache cycle, skipping to prevent loop. ';
return response;
}
const extractedFiles = new Set();
const extractArgs = [];
const streamsToRemove = [];
for (const stream of subtitleStreams) {
const codec = (stream.codec_name || '').toLowerCase();
// Skip unsupported
if (UNSUPPORTED_SUBTITLES.has(codec) || IMAGE_SUBTITLES.has(codec)) continue;
// Check language filter
const lang = stream.tags?.language?.toLowerCase() || 'unknown';
if (extractLangs && !extractLangs.has(lang)) continue;
// Skip commentary
if (inputs.skip_commentary) {
const title = (stream.tags?.title || '').toLowerCase();
if (title.includes('commentary') || title.includes('description')) continue;
}
// Build unique filename
const safeLang = sanitizeFilename(lang);
let subsFile = `${basePath}.${safeLang}.srt`;
let counter = 1;
while ((extractedFiles.has(subsFile) || fileExistsValid(subsFile, fs)) && counter < MAX_FILENAME_ATTEMPTS) {
subsFile = `${basePath}.${safeLang}.${counter}.srt`;
counter++;
}
if (fileExistsValid(subsFile, fs)) continue;
extractArgs.push('-map', `0:${stream.index}`, subsFile);
extractedFiles.add(subsFile);
streamsToRemove.push(stream.index);
}
if (extractArgs.length === 0) {
response.infoLog = '✅ No subtitles to extract (all exist or filtered). ';
return response;
}
// Execute extraction
const ffmpegPath = otherArguments?.ffmpegPath || 'tdarr-ffmpeg';
const cmdParts = [ffmpegPath, '-y', '-i', sanitizeForShell(file.file)];
for (let i = 0; i < extractArgs.length; i++) {
if (extractArgs[i] === '-map') {
cmdParts.push('-map', extractArgs[i + 1]);
i++;
} else {
cmdParts.push(sanitizeForShell(extractArgs[i]));
}
}
const extractCount = streamsToRemove.length;
response.infoLog += `✅ Extracting ${extractCount} subtitle(s)... `;
try {
const execCmd = cmdParts.join(' ');
execSync(execCmd, { stdio: 'pipe', timeout: 300000, maxBuffer: 10 * 1024 * 1024 });
response.infoLog += 'Done. ';
} catch (e) {
const errorMsg = e.stderr ? e.stderr.toString() : e.message;
response.infoLog += `⚠️ Extraction failed: ${errorMsg}. `;
if (!inputs.remove_after_extract) return response;
response.infoLog += 'Proceeding with removal regardless. ';
}
// Remove subtitles from container if requested
if (inputs.remove_after_extract && streamsToRemove.length > 0) {
let preset = '<io> -map 0';
streamsToRemove.forEach((idx) => { preset += ` -map -0:${idx}`; });
preset += ' -c copy -max_muxing_queue_size 9999';
response.preset = preset;
response.processFile = true;
response.reQueueAfter = true;
response.infoLog += `✅ Removing ${streamsToRemove.length} embedded subtitle(s). `;
} else {
response.infoLog += '✅ Subtitles extracted, container unchanged. ';
}
// Final Summary block
if (extractCount > 0) {
response.infoLog += '\n\n📋 Final Processing Summary:\n';
response.infoLog += ` Subtitles extracted: ${extractCount}\n`;
if (inputs.remove_after_extract) {
response.infoLog += ` - Embedded subtitles removed from container\n`;
} else {
response.infoLog += ` - Embedded subtitles preserved\n`;
}
}
return response;
} catch (error) {
response.processFile = false;
response.preset = '';
response.reQueueAfter = false;
response.infoLog = `❌ Plugin error: ${error.message}\n`;
return response;
}
};
module.exports.details = details;
module.exports.plugin = plugin;