241 lines
8.9 KiB
JavaScript
241 lines
8.9 KiB
JavaScript
/* eslint-disable no-plusplus */
|
||
const details = () => ({
|
||
id: 'Tdarr_Plugin_05_subtitle_extraction',
|
||
Stage: 'Pre-processing',
|
||
Name: '05 - Subtitle Extraction',
|
||
Type: 'Video',
|
||
Operation: 'Transcode',
|
||
Description: `
|
||
Extracts embedded subtitles to external .srt files.
|
||
- Optionally removes embedded subtitles after extraction
|
||
- Skips commentary/description tracks if configured
|
||
- Skips image-based subtitles (PGS/VobSub - cannot extract to SRT)
|
||
|
||
**Single Responsibility**: External file extraction only.
|
||
Run AFTER subtitle conversion.
|
||
`,
|
||
Version: '4.0.0',
|
||
Tags: 'action,ffmpeg,subtitles,srt,extract',
|
||
Inputs: [
|
||
{
|
||
name: 'extract_subtitles',
|
||
type: 'string',
|
||
defaultValue: 'true*',
|
||
inputUI: { type: 'dropdown', options: ['true*', 'false'] },
|
||
tooltip: 'Extract embedded text subtitles to external .srt files.',
|
||
},
|
||
{
|
||
name: 'remove_after_extract',
|
||
type: 'string',
|
||
defaultValue: 'false',
|
||
inputUI: { type: 'dropdown', options: ['false', 'true'] },
|
||
tooltip: 'Remove embedded subtitles from container after extracting them.',
|
||
},
|
||
{
|
||
name: 'skip_commentary',
|
||
type: 'string',
|
||
defaultValue: 'true*',
|
||
inputUI: { type: 'dropdown', options: ['true*', 'false'] },
|
||
tooltip: 'Skip extracting subtitles with "commentary" or "description" in the title.',
|
||
},
|
||
{
|
||
name: 'extract_languages',
|
||
type: 'string',
|
||
defaultValue: '',
|
||
inputUI: { type: 'text' },
|
||
tooltip: 'Comma-separated language codes to extract. Empty = extract all.',
|
||
},
|
||
],
|
||
});
|
||
|
||
// Constants
|
||
const IMAGE_SUBTITLES = new Set(['hdmv_pgs_subtitle', 'dvd_subtitle', 'dvdsub']);
|
||
const UNSUPPORTED_SUBTITLES = new Set(['eia_608', 'cc_dec', 'tx3g']);
|
||
const MIN_SUBTITLE_SIZE = 100;
|
||
const MAX_FILENAME_ATTEMPTS = 100;
|
||
const BOOLEAN_INPUTS = ['extract_subtitles', 'remove_after_extract', 'skip_commentary'];
|
||
|
||
// Utilities
|
||
const stripStar = (v) => (typeof v === 'string' ? v.replace(/\*/g, '') : v);
|
||
|
||
const sanitizeFilename = (name, maxLen = 50) => {
|
||
if (typeof name !== 'string') return 'file';
|
||
name = name.replace(/[<>:"|?*\/\\\x00-\x1f\x7f]/g, '_').replace(/^[.\s]+|[.\s]+$/g, '');
|
||
return name.length === 0 ? 'file' : name.substring(0, maxLen);
|
||
};
|
||
|
||
const sanitizeForShell = (str) => {
|
||
if (typeof str !== 'string') throw new TypeError('Input must be a string');
|
||
str = str.replace(/\0/g, '');
|
||
return `"${str.replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\$/g, '\\$')}"`;
|
||
};
|
||
|
||
const fileExistsValid = (filePath, fs) => {
|
||
try { return fs.statSync(filePath).size > MIN_SUBTITLE_SIZE; }
|
||
catch { return false; }
|
||
};
|
||
|
||
const plugin = (file, librarySettings, inputs, otherArguments) => {
|
||
const lib = require('../methods/lib')();
|
||
const fs = require('fs');
|
||
const path = require('path');
|
||
const { execSync } = require('child_process');
|
||
|
||
const response = {
|
||
processFile: false,
|
||
preset: '',
|
||
container: `.${file.container}`,
|
||
handbrakeMode: false,
|
||
ffmpegMode: true,
|
||
reQueueAfter: false,
|
||
infoLog: '',
|
||
};
|
||
|
||
try {
|
||
// Sanitize inputs and convert booleans
|
||
inputs = lib.loadDefaultValues(inputs, details);
|
||
Object.keys(inputs).forEach((k) => { inputs[k] = stripStar(inputs[k]); });
|
||
BOOLEAN_INPUTS.forEach((k) => { inputs[k] = inputs[k] === 'true'; });
|
||
|
||
if (!inputs.extract_subtitles) {
|
||
response.infoLog = '✅ Subtitle extraction disabled. ';
|
||
return response;
|
||
}
|
||
|
||
const streams = file.ffProbeData?.streams;
|
||
if (!Array.isArray(streams)) {
|
||
response.infoLog = '❌ No stream data available. ';
|
||
return response;
|
||
}
|
||
|
||
// Parse language filter
|
||
const extractLangs = inputs.extract_languages
|
||
? new Set(inputs.extract_languages.split(',').map((l) => l.trim().toLowerCase()).filter(Boolean))
|
||
: null;
|
||
|
||
const subtitleStreams = streams
|
||
.map((s, i) => ({ ...s, index: i }))
|
||
.filter((s) => s.codec_type === 'subtitle');
|
||
|
||
if (subtitleStreams.length === 0) {
|
||
response.infoLog = '✅ No subtitle streams to extract. ';
|
||
return response;
|
||
}
|
||
|
||
// Detect cache cycle
|
||
const isInCache = (file._id || file.file).includes('-TdarrCacheFile-');
|
||
const stableId = (file._id || file.file).replace(/-TdarrCacheFile-[a-zA-Z0-9]+/, '');
|
||
const basePath = path.join(path.dirname(file.file), path.basename(stableId, path.extname(stableId)));
|
||
|
||
// Skip if in cache and NOT removing subtitles (prevents infinite loop)
|
||
if (isInCache && !inputs.remove_after_extract) {
|
||
response.infoLog = 'ℹ️ In cache cycle, skipping to prevent loop. ';
|
||
return response;
|
||
}
|
||
|
||
const extractedFiles = new Set();
|
||
const extractArgs = [];
|
||
const streamsToRemove = [];
|
||
|
||
for (const stream of subtitleStreams) {
|
||
const codec = (stream.codec_name || '').toLowerCase();
|
||
|
||
// Skip unsupported
|
||
if (UNSUPPORTED_SUBTITLES.has(codec) || IMAGE_SUBTITLES.has(codec)) continue;
|
||
|
||
// Check language filter
|
||
const lang = stream.tags?.language?.toLowerCase() || 'unknown';
|
||
if (extractLangs && !extractLangs.has(lang)) continue;
|
||
|
||
// Skip commentary
|
||
if (inputs.skip_commentary) {
|
||
const title = (stream.tags?.title || '').toLowerCase();
|
||
if (title.includes('commentary') || title.includes('description')) continue;
|
||
}
|
||
|
||
// Build unique filename
|
||
const safeLang = sanitizeFilename(lang);
|
||
let subsFile = `${basePath}.${safeLang}.srt`;
|
||
let counter = 1;
|
||
while ((extractedFiles.has(subsFile) || fileExistsValid(subsFile, fs)) && counter < MAX_FILENAME_ATTEMPTS) {
|
||
subsFile = `${basePath}.${safeLang}.${counter}.srt`;
|
||
counter++;
|
||
}
|
||
|
||
if (fileExistsValid(subsFile, fs)) continue;
|
||
|
||
extractArgs.push('-map', `0:${stream.index}`, subsFile);
|
||
extractedFiles.add(subsFile);
|
||
streamsToRemove.push(stream.index);
|
||
}
|
||
|
||
if (extractArgs.length === 0) {
|
||
response.infoLog = '✅ No subtitles to extract (all exist or filtered). ';
|
||
return response;
|
||
}
|
||
|
||
// Execute extraction
|
||
const ffmpegPath = otherArguments?.ffmpegPath || 'tdarr-ffmpeg';
|
||
const cmdParts = [ffmpegPath, '-y', '-i', sanitizeForShell(file.file)];
|
||
for (let i = 0; i < extractArgs.length; i++) {
|
||
if (extractArgs[i] === '-map') {
|
||
cmdParts.push('-map', extractArgs[i + 1]);
|
||
i++;
|
||
} else {
|
||
cmdParts.push(sanitizeForShell(extractArgs[i]));
|
||
}
|
||
}
|
||
|
||
const extractCount = streamsToRemove.length;
|
||
response.infoLog += `✅ Extracting ${extractCount} subtitle(s)... `;
|
||
|
||
try {
|
||
const execCmd = cmdParts.join(' ');
|
||
execSync(execCmd, { stdio: 'pipe', timeout: 300000, maxBuffer: 10 * 1024 * 1024 });
|
||
response.infoLog += 'Done. ';
|
||
} catch (e) {
|
||
const errorMsg = e.stderr ? e.stderr.toString() : e.message;
|
||
response.infoLog += `⚠️ Extraction failed: ${errorMsg}. `;
|
||
if (!inputs.remove_after_extract) return response;
|
||
response.infoLog += 'Proceeding with removal regardless. ';
|
||
}
|
||
|
||
// Remove subtitles from container if requested
|
||
if (inputs.remove_after_extract && streamsToRemove.length > 0) {
|
||
let preset = '<io> -map 0';
|
||
streamsToRemove.forEach((idx) => { preset += ` -map -0:${idx}`; });
|
||
preset += ' -c copy -max_muxing_queue_size 9999';
|
||
|
||
response.preset = preset;
|
||
response.processFile = true;
|
||
response.reQueueAfter = true;
|
||
response.infoLog += `✅ Removing ${streamsToRemove.length} embedded subtitle(s). `;
|
||
} else {
|
||
response.infoLog += '✅ Subtitles extracted, container unchanged. ';
|
||
}
|
||
|
||
// Final Summary block
|
||
if (extractCount > 0) {
|
||
response.infoLog += '\n\n📋 Final Processing Summary:\n';
|
||
response.infoLog += ` Subtitles extracted: ${extractCount}\n`;
|
||
if (inputs.remove_after_extract) {
|
||
response.infoLog += ` - Embedded subtitles removed from container\n`;
|
||
} else {
|
||
response.infoLog += ` - Embedded subtitles preserved\n`;
|
||
}
|
||
}
|
||
|
||
return response;
|
||
|
||
} catch (error) {
|
||
response.processFile = false;
|
||
response.preset = '';
|
||
response.reQueueAfter = false;
|
||
response.infoLog = `❌ Plugin error: ${error.message}\n`;
|
||
return response;
|
||
}
|
||
};
|
||
|
||
module.exports.details = details;
|
||
module.exports.plugin = plugin;
|