import express from 'express'; import path from 'path'; import fs from 'fs/promises'; import { fileURLToPath } from 'url'; import { execFile } from 'child_process'; import { promisify } from 'util'; import multer from 'multer'; import sharp from 'sharp'; import fsSync from 'fs'; import archiver from 'archiver'; import { v4 as uuidv4 } from 'uuid'; import { authorize } from '../middleware/auth.js'; import { createWriteStream } from 'fs'; import dotenv from 'dotenv'; dotenv.config(); import { PDFDocument, PDFName, PDFRawStream } from 'pdf-lib'; import { createCanvas } from '@napi-rs/canvas'; import * as pdfjsLib from 'pdfjs-dist/legacy/build/pdf.mjs'; const router = express.Router(); const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); // Create directories if they don't exist const outputDir = path.join(__dirname, '..', 'outputs'); const sourceDir = path.join(__dirname, '..', 'source'); try { await fs.mkdir(outputDir, { recursive: true }); await fs.mkdir(sourceDir, { recursive: true }); } catch (err) { console.error('Error creating directories:', err); } const storage = multer.diskStorage({ destination: (req, file, cb) => { cb(null, sourceDir); }, filename: (req, file, cb) => { cb(null, file.originalname); } }); const upload = multer({ storage, fileFilter: (req, file, cb) => { const allowedMimes = [ 'image/jpeg', 'image/png', 'image/webp', 'image/gif', 'application/pdf' ]; if (allowedMimes.includes(file.mimetype)) { cb(null, true); } else { cb(new Error('Invalid file type')); } }, limits: { fileSize: parseInt(process.env.MAX_FILE_SIZE) || 50000000 } }); async function clearDirectory(dir) { const items = await fs.readdir(dir); await Promise.all(items.map(async (item) => { const itemPath = path.join(dir, item); await fs.unlink(itemPath); })); } async function clearSourceDirectory(req, res, next) { try { await clearDirectory(sourceDir); next(); } catch (err) { next(err); } } // Upload files into source folder, clearing it first router.post('/upload-multiple', clearSourceDirectory, upload.array('images', 100), async (req, res) => { try { if (!req.files || req.files.length === 0) { return res.status(400).json({ error: 'No files uploaded' }); } const baseUrl = `${req.protocol}://${req.get('host')}`; const uploadedFiles = req.files.map((file) => ({ name: file.originalname, size: file.size, url: `${baseUrl}/source/${encodeURIComponent(file.filename)}` })); res.json({ message: `${req.files.length} files uploaded successfully`, files: uploadedFiles }); } catch (err) { res.status(500).json({ error: err.message }); } }); router.post('/upload', clearSourceDirectory, upload.single('image'), async (req, res) => { try { if (!req.file) { return res.status(400).json({ error: 'No file uploaded' }); } const baseUrl = `${req.protocol}://${req.get('host')}`; res.json({ message: 'File uploaded successfully', file: { name: req.file.originalname, size: req.file.size, url: `${baseUrl}/source/${encodeURIComponent(req.file.filename)}` } }); } catch (err) { res.status(500).json({ error: err.message }); } }); // Get source files list from root folder router.get('/list', async (req, res) => { try { const baseUrl = process.env.BASE_URL; const files = await fs.readdir(sourceDir); const fileDetails = await Promise.all( files.map(async (file) => { const filePath = path.join(sourceDir, file); const stats = await fs.stat(filePath); return { name: file, size: stats.size, url: `${baseUrl}/source/${encodeURIComponent(file)}`, uploadedAt: stats.mtime, extension: path.extname(file).toLowerCase() }; }) ); res.json({ files: fileDetails }); } catch (err) { res.status(500).json({ error: err.message }); } }); async function compressPdf(inputPath, outputPath, quality = 60, targetKb = null) { console.log('compressPdf called:', { inputPath, outputPath, quality, targetKb }); if (!fsSync.existsSync(inputPath)) { throw new Error(`Input file not found: ${inputPath}`); } const outputDirPath = path.dirname(outputPath); if (!fsSync.existsSync(outputDirPath)) { throw new Error(`Output directory does not exist: ${outputDirPath}`); } const inputBytes = fsSync.readFileSync(inputPath); const originalSize = inputBytes.length; const targetBytes = targetKb ? targetKb * 1024 : null; // Step 1: try image-recompression first (keeps text selectable where possible) const firstAttempt = await tryImageRecompression(inputBytes, quality); console.log(`Image-recompression attempt: ${firstAttempt.length} bytes`); if (!targetBytes || firstAttempt.length <= targetBytes) { fsSync.writeFileSync(outputPath, firstAttempt); logResult(originalSize, firstAttempt.length, outputPath, targetBytes); return; } // Step 2: target not met — rasterize pages to guarantee size control console.log('Target not met via image recompression — falling back to page rasterization.'); const baseQuality = Math.max(15, Math.min(90, quality)); const attempts = [ { dpi: 150, q: baseQuality }, { dpi: 120, q: Math.max(15, baseQuality - 15) }, { dpi: 96, q: Math.max(15, baseQuality - 30) }, { dpi: 72, q: Math.max(15, baseQuality - 45) }, { dpi: 72, q: 20 }, { dpi: 50, q: 15 } ]; let bestBuffer = null; for (const { dpi, q } of attempts) { console.log(`Rasterizing at dpi=${dpi}, jpegQuality=${q}`); const buffer = await rasterizeAndBuild(inputPath, dpi, q); console.log(` -> ${buffer.length} bytes`); if (!bestBuffer || buffer.length < bestBuffer.length) { bestBuffer = buffer; } if (buffer.length <= targetBytes) { fsSync.writeFileSync(outputPath, buffer); console.log(`Target reached via rasterization at dpi=${dpi}, quality=${q}`); logResult(originalSize, buffer.length, outputPath, targetBytes); return; } } console.warn(`Could not fully reach target of ${targetKb}KB. Writing smallest achieved version.`); const finalBuffer = bestBuffer.length < firstAttempt.length ? bestBuffer : firstAttempt; fsSync.writeFileSync(outputPath, finalBuffer); logResult(originalSize, finalBuffer.length, outputPath, targetBytes); } // --- Step 1 helper: recompress existing embedded JPEG/Flate images in place --- async function tryImageRecompression(inputBytes, quality) { const pdfDoc = await PDFDocument.load(inputBytes, { ignoreEncryption: true, updateMetadata: false }); const context = pdfDoc.context; const indirectObjects = context.enumerateIndirectObjects(); const scaleFactor = quality >= 80 ? 1 : quality >= 60 ? 0.85 : quality >= 40 ? 0.65 : 0.5; for (const [ref, obj] of indirectObjects) { try { if (!(obj instanceof PDFRawStream)) continue; const dict = obj.dict; const subtype = dict.get(PDFName.of('Subtype')); if (!subtype || subtype.toString() !== '/Image') continue; const filter = dict.get(PDFName.of('Filter')); const filterName = filter ? filter.toString() : ''; const isJpeg = filterName.includes('DCTDecode'); const isFlate = filterName.includes('FlateDecode'); if (!isJpeg && !isFlate) continue; const rawBytes = obj.contents; if (!rawBytes || rawBytes.length < 2000) continue; let sharpInput; if (isJpeg) { sharpInput = Buffer.from(rawBytes); } else { const width = dict.get(PDFName.of('Width'))?.asNumber?.(); const height = dict.get(PDFName.of('Height'))?.asNumber?.(); const bpc = dict.get(PDFName.of('BitsPerComponent'))?.asNumber?.() || 8; const csObj = dict.get(PDFName.of('ColorSpace')); const csName = csObj ? csObj.toString() : '/DeviceRGB'; if (!width || !height || bpc !== 8) continue; let channels = 3; if (csName.includes('Gray')) channels = 1; else if (csName.includes('CMYK')) channels = 4; const expectedSize = width * height * channels; if (rawBytes.length < expectedSize) continue; sharpInput = await sharp(Buffer.from(rawBytes), { raw: { width, height, channels } }).toBuffer(); } const image = sharp(sharpInput, { failOn: 'none' }); const metadata = await image.metadata(); const newWidth = Math.round((metadata.width || 0) * scaleFactor); const compressedBuffer = await image .resize({ width: newWidth > 0 ? newWidth : undefined, withoutEnlargement: true }) .jpeg({ quality: Math.max(10, Math.min(95, quality)), mozjpeg: true }) .toBuffer(); if (compressedBuffer.length >= rawBytes.length) continue; const newMeta = await sharp(compressedBuffer).metadata(); dict.set(PDFName.of('Filter'), PDFName.of('DCTDecode')); dict.set(PDFName.of('Width'), context.obj(newMeta.width)); dict.set(PDFName.of('Height'), context.obj(newMeta.height)); dict.set(PDFName.of('ColorSpace'), PDFName.of('DeviceRGB')); dict.set(PDFName.of('BitsPerComponent'), context.obj(8)); dict.delete(PDFName.of('DecodeParms')); dict.delete(PDFName.of('SMask')); context.assign(ref, PDFRawStream.of(dict, compressedBuffer)); } catch (e) { continue; } } return await pdfDoc.save({ useObjectStreams: true }); } // --- Step 2 helper: render every page to a JPEG via pdfjs-dist + napi-rs canvas, rebuild PDF --- async function rasterizeAndBuild(inputPath, dpi, jpegQuality) { const data = new Uint8Array(fsSync.readFileSync(inputPath)); const loadingTask = pdfjsLib.getDocument({ data }); const pdfDocument = await loadingTask.promise; const newPdf = await PDFDocument.create(); const scale = dpi / 72; // pdfjs default is 72 DPI baseline for (let pageNum = 1; pageNum <= pdfDocument.numPages; pageNum++) { const page = await pdfDocument.getPage(pageNum); const viewport = page.getViewport({ scale }); const canvas = createCanvas(Math.ceil(viewport.width), Math.ceil(viewport.height)); const ctx = canvas.getContext('2d'); await page.render({ canvasContext: ctx, viewport }).promise; const pngBuffer = canvas.toBuffer('image/png'); const compressed = await sharp(pngBuffer) .jpeg({ quality: jpegQuality, mozjpeg: true }) .toBuffer(); const metadata = await sharp(compressed).metadata(); const jpgImage = await newPdf.embedJpg(compressed); const newPage = newPdf.addPage([metadata.width, metadata.height]); newPage.drawImage(jpgImage, { x: 0, y: 0, width: metadata.width, height: metadata.height }); } return await newPdf.save({ useObjectStreams: true }); } function logResult(originalSize, newSize, outputPath, targetBytes) { if (!fsSync.existsSync(outputPath) || fsSync.statSync(outputPath).size === 0) { throw new Error(`Compression ran but produced no/empty output at ${outputPath}`); } const pct = ((1 - newSize / originalSize) * 100).toFixed(1); console.log(`Done. ${originalSize} -> ${newSize} bytes (${pct}% reduction)`); if (targetBytes && newSize > targetBytes) { console.warn(`Note: final size (${newSize} bytes) still exceeds target (${targetBytes} bytes).`); } } // Compress multiple files from source folder router.post('/compress-multiple', async (req, res) => { try { const { filenames, quality, targetKb } = req.body; if (!filenames || !Array.isArray(filenames) || filenames.length === 0) { return res.status(400).json({ error: 'No filenames provided' }); } const desiredKb = targetKb ? parseInt(targetKb, 10) : null; const compressedFiles = await Promise.all( filenames.map(async (filename) => { const inputPath = path.join(sourceDir, filename); const outputFilename = filename const outputPath = path.join(outputDir, outputFilename); const extension = path.extname(filename).toLowerCase(); try { await fs.access(inputPath); if (extension === '.pdf') { await compressPdf(inputPath, outputPath, parseInt(quality, 10),targetKb); } else { let currentQuality = parseInt(quality, 10) || 80; let sizeOk = false; if (desiredKb) { while (currentQuality >= 30) { await sharp(inputPath) .resize(2048, 2048, { fit: 'inside', withoutEnlargement: true }) .jpeg({ quality: currentQuality, progressive: true }) .toFile(outputPath); const stats = await fs.stat(outputPath); if (stats.size <= desiredKb * 1024) { sizeOk = true; break; } currentQuality -= 5; } if (!sizeOk) { // keep the smallest generated version } } else { await sharp(inputPath) .resize(2048, 2048, { fit: 'inside', withoutEnlargement: true }) .jpeg({ quality: currentQuality, progressive: true }) .toFile(outputPath); } } const stats = await fs.stat(outputPath); return { original: filename, compressed: outputFilename, url: `/outputs/${outputFilename}`, size: stats.size }; } catch (err) { return { original: filename, error: err.message }; } }) ); res.json({ message: 'Files compressed successfully', files: compressedFiles }); } catch (err) { res.status(500).json({ error: err.message }); } }); // Compress single file from source folder router.post('/compress/:filename', async (req, res) => { try { const { filename } = req.params; const { quality, targetKb } = req.body; const inputPath = path.join(sourceDir, filename); const outputFilename = filename; const outputPath = path.join(outputDir, outputFilename); await fs.access(inputPath); const extension = path.extname(filename).toLowerCase(); if (extension === '.pdf') { await compressPdf(inputPath, outputPath, parseInt(quality, 10),targetKb); } else { const desiredKb = targetKb ? parseInt(targetKb, 10) : null; let currentQuality = parseInt(quality, 10) if (desiredKb) { let sizeOk = false; while (currentQuality >= 30) { await sharp(inputPath) .resize(2048, 2048, { fit: 'inside', withoutEnlargement: true }) .jpeg({ quality: currentQuality, progressive: true }) .toFile(outputPath); const stats = await fs.stat(outputPath); if (stats.size <= desiredKb * 1024) { sizeOk = true; break; } currentQuality -= 5; } if (!sizeOk) { // Keep the smallest version generated } } else { await sharp(inputPath) .resize(2048, 2048, { fit: 'inside', withoutEnlargement: true }) .jpeg({ quality: currentQuality, progressive: true }) .toFile(outputPath); } } const stats = await fs.stat(outputPath); res.json({ message: 'File compressed successfully', file: { filename: outputFilename, url: `/outputs/${outputFilename}`, size: stats.size } }); } catch (err) { if (err.code === 'ENOENT') { return res.status(404).json({ error: 'Input file not found' }); } res.status(500).json({ error: err.message }); } }); // Download single file router.get('/download/:filename', async (req, res) => { try { const { filename } = req.params; const filePath = path.join(outputDir, filename); await fs.access(filePath); res.download(filePath, filename, async (err) => { if (err && err.code !== 'ERR_HTTP_HEADERS_SENT') { console.error('Download error:', err); } else { // Auto-delete the downloaded output file try { await fs.unlink(filePath); console.log(`File deleted: ${filename}`); } catch (delErr) { console.error('Delete error:', delErr); } // ALSO clear the source folder so both source + output end up empty try { await clearDirectory(sourceDir); console.log('Source directory cleared after single file download'); } catch (srcErr) { console.error('Could not clear source directory:', srcErr); } } }); } catch (err) { res.status(404).json({ error: 'File not found' }); } }); // router.get('/download/:filename', async (req, res) => { // try { // const { filename } = req.params; // const filePath = path.join(outputDir, filename); // await fs.access(filePath); // res.download(filePath, filename, async (err) => { // if (err && err.code !== 'ERR_HTTP_HEADERS_SENT') { // console.error('Download error:', err); // } else { // // Auto-delete after successful download // try { // await fs.unlink(filePath); // console.log(`File deleted: ${filename}`); // } catch (delErr) { // console.error('Delete error:', delErr); // } // } // }); // } catch (err) { // res.status(404).json({ error: 'File not found' }); // } // }); // Download all files as ZIP router.post('/download-zip', async (req, res) => { try { const { filenames } = req.body; if (!filenames || !Array.isArray(filenames) || filenames.length === 0) { return res.status(400).json({ error: 'No files specified' }); } const zipFilename = `download-${uuidv4()}.zip`; const zipPath = path.join(outputDir, zipFilename); const output = createWriteStream(zipPath); const archive = archiver('zip', { zlib: { level: 9 } }); output.on('close', async () => { res.download(zipPath, zipFilename, async (err) => { if (err && err.code !== 'ERR_HTTP_HEADERS_SENT') { console.error('Download error:', err); } else { // Auto-delete downloaded ZIP and the included output files try { for (const filename of filenames) { const filePath = path.join(outputDir, filename); try { await fs.unlink(filePath); console.log(`Output file deleted: ${filename}`); } catch (deleteErr) { console.error(`Could not delete output file ${filename}:`, deleteErr); } } await fs.unlink(zipPath); console.log(`ZIP file deleted: ${zipFilename}`); // ALSO clear the source folder so both source + output end up empty try { await clearDirectory(sourceDir); console.log('Source directory cleared after zip download'); } catch (srcErr) { console.error('Could not clear source directory:', srcErr); } } catch (delErr) { console.error('Delete error:', delErr); } } }); }); archive.on('error', (err) => { res.status(500).json({ error: err.message }); }); archive.pipe(output); // Add files to archive for (const filename of filenames) { const filePath = path.join(outputDir, filename); try { await fs.access(filePath); archive.file(filePath, { name: filename }); } catch (err) { console.warn(`File not found: ${filename}`); } } await archive.finalize(); } catch (err) { res.status(500).json({ error: err.message }); } }); // Delete file router.delete('/delete/:filename', async (req, res) => { try { const { filename } = req.params; const filePath = path.join(outputDir, filename); await fs.unlink(filePath); res.json({ message: 'File deleted successfully' }); } catch (err) { res.status(500).json({ error: 'File not found' }); } }); // List output files router.get('/outputs', async (req, res) => { try { const baseUrl = process.env.BASE_URL; const files = await fs.readdir(outputDir); const fileDetails = await Promise.all( files.map(async (file) => { const filePath = path.join(outputDir, file); const stats = await fs.stat(filePath); return { name: file, size: stats.size, url: `${baseUrl}/outputs/${encodeURIComponent(file)}`, createdAt: stats.birthtime }; }) ); res.json({ files: fileDetails }); } catch (err) { res.status(500).json({ error: err.message }); } }); export default router;