tdarr-plugs/Local/Tdarr_Plugin_05_subtitle_extraction.js

/* eslint-disable no-plusplus */
const details = () => ({
    id: 'Tdarr_Plugin_05_subtitle_extraction',
    Stage: 'Pre-processing',
    Name: '05 - Subtitle Extraction',
    Type: 'Video',
    Operation: 'Transcode',
    Description: `
  Extracts embedded subtitles to external .srt files.
  - Optionally removes embedded subtitles after extraction
  - Skips commentary/description tracks if configured
  - Skips image-based subtitles (PGS/VobSub - cannot extract to SRT)

  **Single Responsibility**: External file extraction only.
  Run AFTER subtitle conversion.
  `,
    Version: '4.0.0',
    Tags: 'action,ffmpeg,subtitles,srt,extract',
    Inputs: [
        {
            name: 'extract_subtitles',
            type: 'string',
            defaultValue: 'true*',
            inputUI: { type: 'dropdown', options: ['true*', 'false'] },
            tooltip: 'Extract embedded text subtitles to external .srt files.',
        },
        {
            name: 'remove_after_extract',
            type: 'string',
            defaultValue: 'false',
            inputUI: { type: 'dropdown', options: ['false', 'true'] },
            tooltip: 'Remove embedded subtitles from container after extracting them.',
        },
        {
            name: 'skip_commentary',
            type: 'string',
            defaultValue: 'true*',
            inputUI: { type: 'dropdown', options: ['true*', 'false'] },
            tooltip: 'Skip extracting subtitles with "commentary" or "description" in the title.',
        },
        {
            name: 'extract_languages',
            type: 'string',
            defaultValue: '',
            inputUI: { type: 'text' },
            tooltip: 'Comma-separated language codes to extract. Empty = extract all.',
        },
    ],
});

// Constants
const IMAGE_SUBTITLES = new Set(['hdmv_pgs_subtitle', 'dvd_subtitle', 'dvdsub']);
const UNSUPPORTED_SUBTITLES = new Set(['eia_608', 'cc_dec', 'tx3g']);
const MIN_SUBTITLE_SIZE = 100;
const MAX_FILENAME_ATTEMPTS = 100;
const BOOLEAN_INPUTS = ['extract_subtitles', 'remove_after_extract', 'skip_commentary'];

// Utilities
const stripStar = (v) => (typeof v === 'string' ? v.replace(/\*/g, '') : v);

const sanitizeFilename = (name, maxLen = 50) => {
    if (typeof name !== 'string') return 'file';
    name = name.replace(/[<>:"|?*\/\\\x00-\x1f\x7f]/g, '_').replace(/^[.\s]+|[.\s]+$/g, '');
    return name.length === 0 ? 'file' : name.substring(0, maxLen);
};

const sanitizeForShell = (str) => {
    if (typeof str !== 'string') throw new TypeError('Input must be a string');
    str = str.replace(/\0/g, '');
    return `"${str.replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\$/g, '\\$')}"`;
};

const fileExistsValid = (filePath, fs) => {
    try { return fs.statSync(filePath).size > MIN_SUBTITLE_SIZE; }
    catch { return false; }
};

const plugin = (file, librarySettings, inputs, otherArguments) => {
    const lib = require('../methods/lib')();
    const fs = require('fs');
    const path = require('path');
    const { execSync } = require('child_process');

    const response = {
        processFile: false,
        preset: '',
        container: `.${file.container}`,
        handbrakeMode: false,
        ffmpegMode: true,
        reQueueAfter: false,
        infoLog: '',
    };

    try {
        // Sanitize inputs and convert booleans
        inputs = lib.loadDefaultValues(inputs, details);
        Object.keys(inputs).forEach((k) => { inputs[k] = stripStar(inputs[k]); });
        BOOLEAN_INPUTS.forEach((k) => { inputs[k] = inputs[k] === 'true'; });

        if (!inputs.extract_subtitles) {
            response.infoLog = '✅ Subtitle extraction disabled. ';
            return response;
        }

        const streams = file.ffProbeData?.streams;
        if (!Array.isArray(streams)) {
            response.infoLog = '❌ No stream data available. ';
            return response;
        }

        // Parse language filter
        const extractLangs = inputs.extract_languages
            ? new Set(inputs.extract_languages.split(',').map((l) => l.trim().toLowerCase()).filter(Boolean))
            : null;

        const subtitleStreams = streams
            .map((s, i) => ({ ...s, index: i }))
            .filter((s) => s.codec_type === 'subtitle');

        if (subtitleStreams.length === 0) {
            response.infoLog = '✅ No subtitle streams to extract. ';
            return response;
        }

        // Detect cache cycle
        const isInCache = (file._id || file.file).includes('-TdarrCacheFile-');
        const stableId = (file._id || file.file).replace(/-TdarrCacheFile-[a-zA-Z0-9]+/, '');
        const basePath = path.join(path.dirname(file.file), path.basename(stableId, path.extname(stableId)));

        // Skip if in cache and NOT removing subtitles (prevents infinite loop)
        if (isInCache && !inputs.remove_after_extract) {
            response.infoLog = 'ℹ️ In cache cycle, skipping to prevent loop. ';
            return response;
        }

        const extractedFiles = new Set();
        const extractArgs = [];
        const streamsToRemove = [];

        for (const stream of subtitleStreams) {
            const codec = (stream.codec_name || '').toLowerCase();

            // Skip unsupported
            if (UNSUPPORTED_SUBTITLES.has(codec) || IMAGE_SUBTITLES.has(codec)) continue;

            // Check language filter
            const lang = stream.tags?.language?.toLowerCase() || 'unknown';
            if (extractLangs && !extractLangs.has(lang)) continue;

            // Skip commentary
            if (inputs.skip_commentary) {
                const title = (stream.tags?.title || '').toLowerCase();
                if (title.includes('commentary') || title.includes('description')) continue;
            }

            // Build unique filename
            const safeLang = sanitizeFilename(lang);
            let subsFile = `${basePath}.${safeLang}.srt`;
            let counter = 1;
            while ((extractedFiles.has(subsFile) || fileExistsValid(subsFile, fs)) && counter < MAX_FILENAME_ATTEMPTS) {
                subsFile = `${basePath}.${safeLang}.${counter}.srt`;
                counter++;
            }

            if (fileExistsValid(subsFile, fs)) continue;

            extractArgs.push('-map', `0:${stream.index}`, subsFile);
            extractedFiles.add(subsFile);
            streamsToRemove.push(stream.index);
        }

        if (extractArgs.length === 0) {
            response.infoLog = '✅ No subtitles to extract (all exist or filtered). ';
            return response;
        }

        // Execute extraction
        const ffmpegPath = otherArguments?.ffmpegPath || 'tdarr-ffmpeg';
        const cmdParts = [ffmpegPath, '-y', '-i', sanitizeForShell(file.file)];
        for (let i = 0; i < extractArgs.length; i++) {
            if (extractArgs[i] === '-map') {
                cmdParts.push('-map', extractArgs[i + 1]);
                i++;
            } else {
                cmdParts.push(sanitizeForShell(extractArgs[i]));
            }
        }

        const extractCount = streamsToRemove.length;
        response.infoLog += `✅ Extracting ${extractCount} subtitle(s)... `;

        try {
            const execCmd = cmdParts.join(' ');
            execSync(execCmd, { stdio: 'pipe', timeout: 300000, maxBuffer: 10 * 1024 * 1024 });
            response.infoLog += 'Done. ';
        } catch (e) {
            const errorMsg = e.stderr ? e.stderr.toString() : e.message;
            response.infoLog += `⚠️ Extraction failed: ${errorMsg}. `;
            if (!inputs.remove_after_extract) return response;
            response.infoLog += 'Proceeding with removal regardless. ';
        }

        // Remove subtitles from container if requested
        if (inputs.remove_after_extract && streamsToRemove.length > 0) {
            let preset = '<io> -map 0';
            streamsToRemove.forEach((idx) => { preset += ` -map -0:${idx}`; });
            preset += ' -c copy -max_muxing_queue_size 9999';

            response.preset = preset;
            response.processFile = true;
            response.reQueueAfter = true;
            response.infoLog += `✅ Removing ${streamsToRemove.length} embedded subtitle(s). `;
        } else {
            response.infoLog += '✅ Subtitles extracted, container unchanged. ';
        }

        // Final Summary block
        if (extractCount > 0) {
            response.infoLog += '\n\n📋 Final Processing Summary:\n';
            response.infoLog += `  Subtitles extracted: ${extractCount}\n`;
            if (inputs.remove_after_extract) {
                response.infoLog += `  - Embedded subtitles removed from container\n`;
            } else {
                response.infoLog += `  - Embedded subtitles preserved\n`;
            }
        }

        return response;

    } catch (error) {
        response.processFile = false;
        response.preset = '';
        response.reQueueAfter = false;
        response.infoLog = `❌ Plugin error: ${error.message}\n`;
        return response;
    }
};

module.exports.details = details;
module.exports.plugin = plugin;