first commit

This commit is contained in:
Gitea
2026-06-24 16:07:39 +05:30
commit 38c43c1cbb
11 changed files with 3837 additions and 0 deletions
+644
View File
@@ -0,0 +1,644 @@
import express from 'express';
import path from 'path';
import fs from 'fs/promises';
import { fileURLToPath } from 'url';
import { execFile } from 'child_process';
import { promisify } from 'util';
import multer from 'multer';
import sharp from 'sharp';
import fsSync from 'fs';
import archiver from 'archiver';
import { v4 as uuidv4 } from 'uuid';
import { authorize } from '../middleware/auth.js';
import { createWriteStream } from 'fs';
import dotenv from 'dotenv';
dotenv.config();
import { PDFDocument, PDFName, PDFRawStream } from 'pdf-lib';
import { createCanvas } from '@napi-rs/canvas';
import * as pdfjsLib from 'pdfjs-dist/legacy/build/pdf.mjs';
const router = express.Router();
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
// Create directories if they don't exist
const outputDir = path.join(__dirname, '..', 'outputs');
const sourceDir = path.join(__dirname, '..', 'source');
try {
await fs.mkdir(outputDir, { recursive: true });
await fs.mkdir(sourceDir, { recursive: true });
} catch (err) {
console.error('Error creating directories:', err);
}
const storage = multer.diskStorage({
destination: (req, file, cb) => {
cb(null, sourceDir);
},
filename: (req, file, cb) => {
cb(null, file.originalname);
}
});
const upload = multer({
storage,
fileFilter: (req, file, cb) => {
const allowedMimes = [
'image/jpeg',
'image/png',
'image/webp',
'image/gif',
'application/pdf'
];
if (allowedMimes.includes(file.mimetype)) {
cb(null, true);
} else {
cb(new Error('Invalid file type'));
}
},
limits: { fileSize: parseInt(process.env.MAX_FILE_SIZE) || 50000000 }
});
async function clearDirectory(dir) {
const items = await fs.readdir(dir);
await Promise.all(items.map(async (item) => {
const itemPath = path.join(dir, item);
await fs.unlink(itemPath);
}));
}
async function clearSourceDirectory(req, res, next) {
try {
await clearDirectory(sourceDir);
next();
} catch (err) {
next(err);
}
}
// Upload files into source folder, clearing it first
router.post('/upload-multiple', clearSourceDirectory, upload.array('images', 100), async (req, res) => {
try {
if (!req.files || req.files.length === 0) {
return res.status(400).json({ error: 'No files uploaded' });
}
const baseUrl = `${req.protocol}://${req.get('host')}`;
const uploadedFiles = req.files.map((file) => ({
name: file.originalname,
size: file.size,
url: `${baseUrl}/source/${encodeURIComponent(file.filename)}`
}));
res.json({
message: `${req.files.length} files uploaded successfully`,
files: uploadedFiles
});
} catch (err) {
res.status(500).json({ error: err.message });
}
});
router.post('/upload', clearSourceDirectory, upload.single('image'), async (req, res) => {
try {
if (!req.file) {
return res.status(400).json({ error: 'No file uploaded' });
}
const baseUrl = `${req.protocol}://${req.get('host')}`;
res.json({
message: 'File uploaded successfully',
file: {
name: req.file.originalname,
size: req.file.size,
url: `${baseUrl}/source/${encodeURIComponent(req.file.filename)}`
}
});
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// Get source files list from root folder
router.get('/list', async (req, res) => {
try {
const baseUrl = process.env.BASE_URL;
const files = await fs.readdir(sourceDir);
const fileDetails = await Promise.all(
files.map(async (file) => {
const filePath = path.join(sourceDir, file);
const stats = await fs.stat(filePath);
return {
name: file,
size: stats.size,
url: `${baseUrl}/source/${encodeURIComponent(file)}`,
uploadedAt: stats.mtime,
extension: path.extname(file).toLowerCase()
};
})
);
res.json({ files: fileDetails });
} catch (err) {
res.status(500).json({ error: err.message });
}
});
async function compressPdf(inputPath, outputPath, quality = 60, targetKb = null) {
console.log('compressPdf called:', { inputPath, outputPath, quality, targetKb });
if (!fsSync.existsSync(inputPath)) {
throw new Error(`Input file not found: ${inputPath}`);
}
const outputDirPath = path.dirname(outputPath);
if (!fsSync.existsSync(outputDirPath)) {
throw new Error(`Output directory does not exist: ${outputDirPath}`);
}
const inputBytes = fsSync.readFileSync(inputPath);
const originalSize = inputBytes.length;
const targetBytes = targetKb ? targetKb * 1024 : null;
// Step 1: try image-recompression first (keeps text selectable where possible)
const firstAttempt = await tryImageRecompression(inputBytes, quality);
console.log(`Image-recompression attempt: ${firstAttempt.length} bytes`);
if (!targetBytes || firstAttempt.length <= targetBytes) {
fsSync.writeFileSync(outputPath, firstAttempt);
logResult(originalSize, firstAttempt.length, outputPath, targetBytes);
return;
}
// Step 2: target not met — rasterize pages to guarantee size control
console.log('Target not met via image recompression — falling back to page rasterization.');
const baseQuality = Math.max(15, Math.min(90, quality));
const attempts = [
{ dpi: 150, q: baseQuality },
{ dpi: 120, q: Math.max(15, baseQuality - 15) },
{ dpi: 96, q: Math.max(15, baseQuality - 30) },
{ dpi: 72, q: Math.max(15, baseQuality - 45) },
{ dpi: 72, q: 20 },
{ dpi: 50, q: 15 }
];
let bestBuffer = null;
for (const { dpi, q } of attempts) {
console.log(`Rasterizing at dpi=${dpi}, jpegQuality=${q}`);
const buffer = await rasterizeAndBuild(inputPath, dpi, q);
console.log(` -> ${buffer.length} bytes`);
if (!bestBuffer || buffer.length < bestBuffer.length) {
bestBuffer = buffer;
}
if (buffer.length <= targetBytes) {
fsSync.writeFileSync(outputPath, buffer);
console.log(`Target reached via rasterization at dpi=${dpi}, quality=${q}`);
logResult(originalSize, buffer.length, outputPath, targetBytes);
return;
}
}
console.warn(`Could not fully reach target of ${targetKb}KB. Writing smallest achieved version.`);
const finalBuffer = bestBuffer.length < firstAttempt.length ? bestBuffer : firstAttempt;
fsSync.writeFileSync(outputPath, finalBuffer);
logResult(originalSize, finalBuffer.length, outputPath, targetBytes);
}
// --- Step 1 helper: recompress existing embedded JPEG/Flate images in place ---
async function tryImageRecompression(inputBytes, quality) {
const pdfDoc = await PDFDocument.load(inputBytes, { ignoreEncryption: true, updateMetadata: false });
const context = pdfDoc.context;
const indirectObjects = context.enumerateIndirectObjects();
const scaleFactor = quality >= 80 ? 1 : quality >= 60 ? 0.85 : quality >= 40 ? 0.65 : 0.5;
for (const [ref, obj] of indirectObjects) {
try {
if (!(obj instanceof PDFRawStream)) continue;
const dict = obj.dict;
const subtype = dict.get(PDFName.of('Subtype'));
if (!subtype || subtype.toString() !== '/Image') continue;
const filter = dict.get(PDFName.of('Filter'));
const filterName = filter ? filter.toString() : '';
const isJpeg = filterName.includes('DCTDecode');
const isFlate = filterName.includes('FlateDecode');
if (!isJpeg && !isFlate) continue;
const rawBytes = obj.contents;
if (!rawBytes || rawBytes.length < 2000) continue;
let sharpInput;
if (isJpeg) {
sharpInput = Buffer.from(rawBytes);
} else {
const width = dict.get(PDFName.of('Width'))?.asNumber?.();
const height = dict.get(PDFName.of('Height'))?.asNumber?.();
const bpc = dict.get(PDFName.of('BitsPerComponent'))?.asNumber?.() || 8;
const csObj = dict.get(PDFName.of('ColorSpace'));
const csName = csObj ? csObj.toString() : '/DeviceRGB';
if (!width || !height || bpc !== 8) continue;
let channels = 3;
if (csName.includes('Gray')) channels = 1;
else if (csName.includes('CMYK')) channels = 4;
const expectedSize = width * height * channels;
if (rawBytes.length < expectedSize) continue;
sharpInput = await sharp(Buffer.from(rawBytes), { raw: { width, height, channels } }).toBuffer();
}
const image = sharp(sharpInput, { failOn: 'none' });
const metadata = await image.metadata();
const newWidth = Math.round((metadata.width || 0) * scaleFactor);
const compressedBuffer = await image
.resize({ width: newWidth > 0 ? newWidth : undefined, withoutEnlargement: true })
.jpeg({ quality: Math.max(10, Math.min(95, quality)), mozjpeg: true })
.toBuffer();
if (compressedBuffer.length >= rawBytes.length) continue;
const newMeta = await sharp(compressedBuffer).metadata();
dict.set(PDFName.of('Filter'), PDFName.of('DCTDecode'));
dict.set(PDFName.of('Width'), context.obj(newMeta.width));
dict.set(PDFName.of('Height'), context.obj(newMeta.height));
dict.set(PDFName.of('ColorSpace'), PDFName.of('DeviceRGB'));
dict.set(PDFName.of('BitsPerComponent'), context.obj(8));
dict.delete(PDFName.of('DecodeParms'));
dict.delete(PDFName.of('SMask'));
context.assign(ref, PDFRawStream.of(dict, compressedBuffer));
} catch (e) {
continue;
}
}
return await pdfDoc.save({ useObjectStreams: true });
}
// --- Step 2 helper: render every page to a JPEG via pdfjs-dist + napi-rs canvas, rebuild PDF ---
async function rasterizeAndBuild(inputPath, dpi, jpegQuality) {
const data = new Uint8Array(fsSync.readFileSync(inputPath));
const loadingTask = pdfjsLib.getDocument({ data });
const pdfDocument = await loadingTask.promise;
const newPdf = await PDFDocument.create();
const scale = dpi / 72; // pdfjs default is 72 DPI baseline
for (let pageNum = 1; pageNum <= pdfDocument.numPages; pageNum++) {
const page = await pdfDocument.getPage(pageNum);
const viewport = page.getViewport({ scale });
const canvas = createCanvas(Math.ceil(viewport.width), Math.ceil(viewport.height));
const ctx = canvas.getContext('2d');
await page.render({
canvasContext: ctx,
viewport
}).promise;
const pngBuffer = canvas.toBuffer('image/png');
const compressed = await sharp(pngBuffer)
.jpeg({ quality: jpegQuality, mozjpeg: true })
.toBuffer();
const metadata = await sharp(compressed).metadata();
const jpgImage = await newPdf.embedJpg(compressed);
const newPage = newPdf.addPage([metadata.width, metadata.height]);
newPage.drawImage(jpgImage, {
x: 0,
y: 0,
width: metadata.width,
height: metadata.height
});
}
return await newPdf.save({ useObjectStreams: true });
}
function logResult(originalSize, newSize, outputPath, targetBytes) {
if (!fsSync.existsSync(outputPath) || fsSync.statSync(outputPath).size === 0) {
throw new Error(`Compression ran but produced no/empty output at ${outputPath}`);
}
const pct = ((1 - newSize / originalSize) * 100).toFixed(1);
console.log(`Done. ${originalSize} -> ${newSize} bytes (${pct}% reduction)`);
if (targetBytes && newSize > targetBytes) {
console.warn(`Note: final size (${newSize} bytes) still exceeds target (${targetBytes} bytes).`);
}
}
// Compress multiple files from source folder
router.post('/compress-multiple', async (req, res) => {
try {
const { filenames, quality, targetKb } = req.body;
if (!filenames || !Array.isArray(filenames) || filenames.length === 0) {
return res.status(400).json({ error: 'No filenames provided' });
}
const desiredKb = targetKb ? parseInt(targetKb, 10) : null;
const compressedFiles = await Promise.all(
filenames.map(async (filename) => {
const inputPath = path.join(sourceDir, filename);
const outputFilename = filename
const outputPath = path.join(outputDir, outputFilename);
const extension = path.extname(filename).toLowerCase();
try {
await fs.access(inputPath);
if (extension === '.pdf') {
await compressPdf(inputPath, outputPath, parseInt(quality, 10),targetKb);
} else {
let currentQuality = parseInt(quality, 10) || 80;
let sizeOk = false;
if (desiredKb) {
while (currentQuality >= 30) {
await sharp(inputPath)
.resize(2048, 2048, {
fit: 'inside',
withoutEnlargement: true
})
.jpeg({ quality: currentQuality, progressive: true })
.toFile(outputPath);
const stats = await fs.stat(outputPath);
if (stats.size <= desiredKb * 1024) {
sizeOk = true;
break;
}
currentQuality -= 5;
}
if (!sizeOk) {
// keep the smallest generated version
}
} else {
await sharp(inputPath)
.resize(2048, 2048, {
fit: 'inside',
withoutEnlargement: true
})
.jpeg({ quality: currentQuality, progressive: true })
.toFile(outputPath);
}
}
const stats = await fs.stat(outputPath);
return {
original: filename,
compressed: outputFilename,
url: `/outputs/${outputFilename}`,
size: stats.size
};
} catch (err) {
return { original: filename, error: err.message };
}
})
);
res.json({
message: 'Files compressed successfully',
files: compressedFiles
});
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// Compress single file from source folder
router.post('/compress/:filename', async (req, res) => {
try {
const { filename } = req.params;
const { quality, targetKb } = req.body;
const inputPath = path.join(sourceDir, filename);
const outputFilename = filename;
const outputPath = path.join(outputDir, outputFilename);
await fs.access(inputPath);
const extension = path.extname(filename).toLowerCase();
if (extension === '.pdf') {
await compressPdf(inputPath, outputPath, parseInt(quality, 10),targetKb);
} else {
const desiredKb = targetKb ? parseInt(targetKb, 10) : null;
let currentQuality = parseInt(quality, 10)
if (desiredKb) {
let sizeOk = false;
while (currentQuality >= 30) {
await sharp(inputPath)
.resize(2048, 2048, {
fit: 'inside',
withoutEnlargement: true
})
.jpeg({ quality: currentQuality, progressive: true })
.toFile(outputPath);
const stats = await fs.stat(outputPath);
if (stats.size <= desiredKb * 1024) {
sizeOk = true;
break;
}
currentQuality -= 5;
}
if (!sizeOk) {
// Keep the smallest version generated
}
} else {
await sharp(inputPath)
.resize(2048, 2048, {
fit: 'inside',
withoutEnlargement: true
})
.jpeg({ quality: currentQuality, progressive: true })
.toFile(outputPath);
}
}
const stats = await fs.stat(outputPath);
res.json({
message: 'File compressed successfully',
file: {
filename: outputFilename,
url: `/outputs/${outputFilename}`,
size: stats.size
}
});
} catch (err) {
if (err.code === 'ENOENT') {
return res.status(404).json({ error: 'Input file not found' });
}
res.status(500).json({ error: err.message });
}
});
// Download single file
router.get('/download/:filename', async (req, res) => {
try {
const { filename } = req.params;
const filePath = path.join(outputDir, filename);
await fs.access(filePath);
res.download(filePath, filename, async (err) => {
if (err && err.code !== 'ERR_HTTP_HEADERS_SENT') {
console.error('Download error:', err);
} else {
// Auto-delete the downloaded output file
try {
await fs.unlink(filePath);
console.log(`File deleted: ${filename}`);
} catch (delErr) {
console.error('Delete error:', delErr);
}
// ALSO clear the source folder so both source + output end up empty
try {
await clearDirectory(sourceDir);
console.log('Source directory cleared after single file download');
} catch (srcErr) {
console.error('Could not clear source directory:', srcErr);
}
}
});
} catch (err) {
res.status(404).json({ error: 'File not found' });
}
});
// router.get('/download/:filename', async (req, res) => {
// try {
// const { filename } = req.params;
// const filePath = path.join(outputDir, filename);
// await fs.access(filePath);
// res.download(filePath, filename, async (err) => {
// if (err && err.code !== 'ERR_HTTP_HEADERS_SENT') {
// console.error('Download error:', err);
// } else {
// // Auto-delete after successful download
// try {
// await fs.unlink(filePath);
// console.log(`File deleted: ${filename}`);
// } catch (delErr) {
// console.error('Delete error:', delErr);
// }
// }
// });
// } catch (err) {
// res.status(404).json({ error: 'File not found' });
// }
// });
// Download all files as ZIP
router.post('/download-zip', async (req, res) => {
try {
const { filenames } = req.body;
if (!filenames || !Array.isArray(filenames) || filenames.length === 0) {
return res.status(400).json({ error: 'No files specified' });
}
const zipFilename = `download-${uuidv4()}.zip`;
const zipPath = path.join(outputDir, zipFilename);
const output = createWriteStream(zipPath);
const archive = archiver('zip', { zlib: { level: 9 } });
output.on('close', async () => {
res.download(zipPath, zipFilename, async (err) => {
if (err && err.code !== 'ERR_HTTP_HEADERS_SENT') {
console.error('Download error:', err);
} else {
// Auto-delete downloaded ZIP and the included output files
try {
for (const filename of filenames) {
const filePath = path.join(outputDir, filename);
try {
await fs.unlink(filePath);
console.log(`Output file deleted: ${filename}`);
} catch (deleteErr) {
console.error(`Could not delete output file ${filename}:`, deleteErr);
}
}
await fs.unlink(zipPath);
console.log(`ZIP file deleted: ${zipFilename}`);
// ALSO clear the source folder so both source + output end up empty
try {
await clearDirectory(sourceDir);
console.log('Source directory cleared after zip download');
} catch (srcErr) {
console.error('Could not clear source directory:', srcErr);
}
} catch (delErr) {
console.error('Delete error:', delErr);
}
}
});
});
archive.on('error', (err) => {
res.status(500).json({ error: err.message });
});
archive.pipe(output);
// Add files to archive
for (const filename of filenames) {
const filePath = path.join(outputDir, filename);
try {
await fs.access(filePath);
archive.file(filePath, { name: filename });
} catch (err) {
console.warn(`File not found: ${filename}`);
}
}
await archive.finalize();
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// Delete file
router.delete('/delete/:filename', async (req, res) => {
try {
const { filename } = req.params;
const filePath = path.join(outputDir, filename);
await fs.unlink(filePath);
res.json({ message: 'File deleted successfully' });
} catch (err) {
res.status(500).json({ error: 'File not found' });
}
});
// List output files
router.get('/outputs', async (req, res) => {
try {
const baseUrl = process.env.BASE_URL;
const files = await fs.readdir(outputDir);
const fileDetails = await Promise.all(
files.map(async (file) => {
const filePath = path.join(outputDir, file);
const stats = await fs.stat(filePath);
return {
name: file,
size: stats.size,
url: `${baseUrl}/outputs/${encodeURIComponent(file)}`,
createdAt: stats.birthtime
};
})
);
res.json({ files: fileDetails });
} catch (err) {
res.status(500).json({ error: err.message });
}
});
export default router;