sách gpt4 ai đã đi

pdf - 将多个 PDF 合并为一个 PDF

In lại 作者:行者123 更新时间:2023-12-04 10:02:14 28 4
mua khóa gpt4 Nike

我的代码有一些问题。我正在尝试遍历包含许多 PDF 的 Drive 文件夹,然后将它们合并为一个文件。当我使用我的代码时,它只是为 Drive 文件夹中的最后一个 PDF 创建一个 PDF,而不是按预期将它们全部合并在一起。

function MergeFiles(){
var folder = DocsList.getFolderById('myFolderID');
var files = folder.getFiles();
var blobs = [];
for( var i in files )
blobs.push(files[i].getBlob().getBytes());
Logger.log(blobs.push(files[i].getBlob().getBytes()));
var myPDF = Utilities.newBlob(blobs.pop(), "application/pdf", "newPDF.pdf");
folder.createFile(myPDF);
}

1 Câu trả lời

因此,这不仅仅是简单地组合每个文件中的数据。每个文件的实际可用数据是用标记和其他代码(类似于 HTML 和其他文档格式)“打包”的。您实际上必须解码每个 PDF 文件,组合必要的部分,然后使用新的“打包”重新编码。这需要 PDF 规范和结构的工作知识,可从 Adobe đây 免费获得。 .

我使用这些信息编写了一个足以满足我需要的脚本。然而,它并没有考虑到所有可能性,因此特别是合并任何需要 PDF-1.4 及更高版本的文档,这将需要相当多的工作。

/**
* Merges all given PDF files into one.
*
* @param {Folder} directory the folder to store the output file
* @param {string} name the desired name of the output file
* @param {File} pdf1 the first PDF file
* @param {File} pdf2 the second PDF file
* @param {File} opt_pdf3 [optional] the third PDF file; add as many more as you like
*
* @return {File} the merged file
*/
function mergePdfs(directory, name, pdf1, pdf2, opt_pdf3) {

if (name.slice(-4) != '.pdf') {

name = name + '.pdf';

}
var newObjects = ['1 0 obj\r\n<>\r\nendobj'];
var pageAddresses = [];
for (var argumentIndex = 2; argumentIndex < arguments.length; argumentIndex++) {

var bytes = arguments[argumentIndex].getBlob().getBytes();
var xrefByteOffset = '';
var byteIndex = bytes.length - 1;
while (!/\sstartxref\s/.test(xrefByteOffset)) {

xrefByteOffset = String.fromCharCode(bytes[byteIndex]) + xrefByteOffset;
byteIndex--;

}
xrefByteOffset = +(/\s\d+\s/.exec(xrefByteOffset)[0]);
var objectByteOffsets = [];
var trailerDictionary = '';
var rootAddress = '';
LÀM {

var xrefTable = '';
var trailerEndByteOffset = byteIndex;
byteIndex = xrefByteOffset;
for (byteIndex; byteIndex <= trailerEndByteOffset; byteIndex++) {

xrefTable = xrefTable + String.fromCharCode(bytes[byteIndex]);

}
xrefTable = xrefTable.split(/\s*trailer\s*/);
trailerDictionary = xrefTable[1];
if (objectByteOffsets.length < 1) {

rootAddress = /\d+\s+\d+\s+R/.exec(/\/Root\s*\d+\s+\d+\s+R/.exec(trailerDictionary)[0])[0].replace('R', 'obj');

}
xrefTable = xrefTable[0].split('\n');
xrefTable.shift();
while (xrefTable.length > 0) {

var xrefSectionHeader = xrefTable.shift().split(/\s+/);
var objectNumber = +xrefSectionHeader[0];
var numberObjects = +xrefSectionHeader[1];
for (var entryIndex = 0; entryIndex < numberObjects; entryIndex++) {

var entry = xrefTable.shift().split(/\s+/);
objectByteOffsets.push([[objectNumber, +entry[1], 'obj'], +entry[0]]);
objectNumber++;

}

}
if (/\s*\/Prev/.test(trailerDictionary)) {

xrefByteOffset = +(/\s*\d+\s/.exec(/\s*\/Prev\s*\d+\s/.exec(trailerDictionary)[0])[0]);

}

} while (/\s*\/Prev/.test(trailerDictionary));
var rootObject = getObject(rootAddress, objectByteOffsets, bytes);
var pagesAddress = /\d+\s+\d+\s+R/.exec(/\/Pages\s*\d+\s+\d+\s+R/.exec(rootObject)[0])[0].replace('R', 'obj');
var pagesObject = getObject(pagesAddress, objectByteOffsets, bytes);
var objects = getDependencies(pagesObject, objectByteOffsets, bytes);
var newObjectsInsertionIndex = newObjects.length;
for (var objectIndex = 0; objectIndex < objects.length; objectIndex++) {

var newObjectAddress = [(newObjects.length + 3) + '', 0 + '', 'obj'];
if (!Array.isArray(objects[objectIndex])) {

objects[objectIndex] = [objects[objectIndex]];

}
objects[objectIndex].unshift(newObjectAddress);
var objectAddress = objects[objectIndex][1].match(/\d+\s+\d+\s+obj/)[0].split(/\s+/);
objects[objectIndex].splice(1, 0, objectAddress);
if (/\/Type\s*\/Page[^s]/.test(objects[objectIndex][2])) {

objects[objectIndex][2] = objects[objectIndex][2].replace(/\/Parent\s*\d+\s+\d+\s+R/.exec(objects[objectIndex][2])[0], '/Parent 2 0 R');
pageAddresses.push(newObjectAddress.join(' ').replace('obj', 'R'));

}
var addressRegExp = new RegExp(objectAddress[0] + '\\s+' + objectAddress[1] + '\\s+' + 'obj');
objects[objectIndex][2] = objects[objectIndex][2].replace(addressRegExp.exec(objects[objectIndex][2])[0], newObjectAddress.join(' '));
newObjects.push(objects[objectIndex]);

}
for (var referencingObjectIndex = newObjectsInsertionIndex; referencingObjectIndex < newObjects.length; referencingObjectIndex++) {

var references = newObjects[referencingObjectIndex][2].match(/\d+\s+\d+\s+R/g);
if (references != null) {

var string = newObjects[referencingObjectIndex][2];
var referenceIndices = [];
var currentIndex = 0;
for (var referenceIndex = 0; referenceIndex < references.length; referenceIndex++) {

referenceIndices.push([]);
referenceIndices[referenceIndex].push(string.slice(currentIndex).indexOf(references[referenceIndex]) + currentIndex);
referenceIndices[referenceIndex].push(references[referenceIndex].length);
currentIndex += string.slice(currentIndex).indexOf(references[referenceIndex]);

}
for (var referenceIndex = 0; referenceIndex < references.length; referenceIndex++) {

var objectAddress = references[referenceIndex].replace('R', 'obj').split(/\s+/);
for (var objectIndex = newObjectsInsertionIndex; objectIndex < newObjects.length; objectIndex++) {

if (arrayEquals(objectAddress, newObjects[objectIndex][1])) {

var length = string.length;
newObjects[referencingObjectIndex][2] = string.slice(0, referenceIndices[referenceIndex][0]) + newObjects[objectIndex][0].join(' ').replace('obj', 'R') +
string.slice(referenceIndices[referenceIndex][0] + referenceIndices[referenceIndex][1]);
string = newObjects[referencingObjectIndex][2];
var newLength = string.length;
if (!(length == newLength)) {

for (var subsequentReferenceIndex = referenceIndex + 1; subsequentReferenceIndex < references.length; subsequentReferenceIndex++) {

referenceIndices[subsequentReferenceIndex][0] += (newLength - length);

}

}
phá vỡ;

}

}

}

}

}
for (var objectIndex = newObjectsInsertionIndex; objectIndex < newObjects.length; objectIndex++) {

if (Array.isArray(newObjects[objectIndex])) {

if (newObjects[objectIndex][3] != undefined) {

newObjects[objectIndex] = newObjects[objectIndex].slice(2);

} khác {

newObjects[objectIndex] = newObjects[objectIndex][2];

}

}

}

}
newObjects.splice(1, 0, '2 0 obj\r\n<>\r\nendobj');
newObjects.splice(2, 0, '3 0 obj\r\n<
Utilities.formatDate(new Date(), CalendarApp.getDefaultCalendar().getTimeZone(), 'yyyyMMddHHmmssZ').slice(0, -2) + "'00) /ModDate (D" + Utilities.formatDate(new Date(),
CalendarApp.getDefaultCalendar().getTimeZone(), 'yyyyMMddHHmmssZ').slice(0, -2) + "'00)>>\r\nendobj");
var byteOffsets = [0];
var bytes = [];
var header = '%PDF-1.3\r\n';
for (var headerIndex = 0; headerIndex < header.length; headerIndex++) {

bytes.push(header.charCodeAt(headerIndex));

}
bytes.push('%'.charCodeAt(0));
for (var characterCode = -127; characterCode < -123; characterCode++) {

bytes.push(characterCode);

}
bytes.push('\r'.charCodeAt(0));
bytes.push('\n'.charCodeAt(0));
while (newObjects.length > 0) {

byteOffsets.push(bytes.length);
var object = newObjects.shift();
if (Array.isArray(object)) {

var streamKeyword = /stream\s*\n/.exec(object[0])[0];
if (streamKeyword.indexOf('\n\n') > streamKeyword.length - 3) {

streamKeyword = streamKeyword.slice(0, -1);

} else if (streamKeyword.indexOf('\r\n\r\n') > streamKeyword.length - 5) {

streamKeyword = streamKeyword.slice(0, -2);

}
var streamIndex = object[0].indexOf(streamKeyword) + streamKeyword.length;
for (var objectIndex = 0; objectIndex < streamIndex; objectIndex++) {

bytes.push(object[0].charCodeAt(objectIndex))

}
bytes = bytes.concat(object[1]);
for (var objectIndex = streamIndex; objectIndex < object[0].length; objectIndex++) {

bytes.push(object[0].charCodeAt(objectIndex));

}

} khác {

for (var objectIndex = 0; objectIndex < object.length; objectIndex++) {

bytes.push(object.charCodeAt(objectIndex));

}

}
bytes.push('\r'.charCodeAt(0));
bytes.push('\n'.charCodeAt(0));

}
var xrefByteOffset = bytes.length;
var xrefHeader = 'xref\r\n';
for (var xrefHeaderIndex = 0; xrefHeaderIndex < xrefHeader.length; xrefHeaderIndex++) {

bytes.push(xrefHeader.charCodeAt(xrefHeaderIndex));

}
var xrefSectionHeader = '0 ' + byteOffsets.length + '\r\n';
for (var xrefSectionHeaderIndex = 0; xrefSectionHeaderIndex < xrefSectionHeader.length; xrefSectionHeaderIndex++) {

bytes.push(xrefSectionHeader.charCodeAt(xrefSectionHeaderIndex));

}
for (var byteOffsetIndex = 0; byteOffsetIndex < byteOffsets.length; byteOffsetIndex++) {

for (var byteOffsetStringIndex = 0; byteOffsetStringIndex < 10; byteOffsetStringIndex++) {

bytes.push(Utilities.formatString('%010d', byteOffsets[byteOffsetIndex]).charCodeAt(byteOffsetStringIndex));

}
bytes.push(' '.charCodeAt(0));
if (byteOffsetIndex == 0) {

for (var generationStringIndex = 0; generationStringIndex < 5; generationStringIndex++) {

bytes.push('65535'.charCodeAt(generationStringIndex));

}
for (var keywordIndex = 0; keywordIndex < 2; keywordIndex++) {

bytes.push(' f'.charCodeAt(keywordIndex));

}

} khác {

for (var generationStringIndex = 0; generationStringIndex < 5; generationStringIndex++) {

bytes.push('0'.charCodeAt(0));

}
for (var keywordIndex = 0; keywordIndex < 2; keywordIndex++) {

bytes.push(' n'.charCodeAt(keywordIndex));

}

}
bytes.push('\r'.charCodeAt(0));
bytes.push('\n'.charCodeAt(0));

}
for (var trailerHeaderIndex = 0; trailerHeaderIndex < 9; trailerHeaderIndex++) {

bytes.push('trailer\r\n'.charCodeAt(trailerHeaderIndex));

}
var idBytes = Utilities.computeDigest(Utilities.DigestAlgorithm.MD5, (new Date).toString());
var id = '';
for (var idByteIndex = 0; idByteIndex < idBytes.length; idByteIndex++) {

id = id + ('0' + (idBytes[idByteIndex] & 0xFF).toString(16)).slice(-2);

}
var trailer = '< <' + id + '>]>>\r\nstartxref\r\n' + xrefByteOffset + '\r\n%%EOF';
for (var trailerIndex = 0; trailerIndex < trailer.length; trailerIndex++) {

bytes.push(trailer.charCodeAt(trailerIndex));

}
return directory.createFile(Utilities.newBlob(bytes, 'application/pdf', name));
function getObject(objectAddress, objectByteOffsets, bytes) {

objectAddress = objectAddress.split(/\s+/);
for (var addressIndex = 0; addressIndex < 2; addressIndex++) {

objectAddress[addressIndex] = +objectAddress[addressIndex];

}
var object = [];
var byteIndex = 0;
for each (var offset in objectByteOffsets) {

if (arrayEquals(objectAddress, offset[0])) {

byteIndex = offset[1];
phá vỡ;

}

}
object.push('');
while (object[0].indexOf('endobj') <= -1) {

if (/stream\s*\n/.test(object[0])) {

var streamLength;
var lengthFinder = object[0].slice(object[0].indexOf(/\/Length/.exec(object[0])[0]));
if (/\/Length\s*\d+\s+\d+\s+R/.test(lengthFinder)) {

var lengthObjectAddress = /\d+\s+\d+\s+R/.exec(/\/Length\s*\d+\s+\d+\s+R/.exec(lengthFinder)[0])[0].split(/\s+/);
lengthObjectAddress[2] = 'obj';
for (var addressIndex = 0; addressIndex < 2; addressIndex++) {

lengthObjectAddress[addressIndex] = +lengthObjectAddress[addressIndex];

}
var lengthObject = ''
var lengthByteIndex = 0;
for each (var offset in objectByteOffsets) {

if (arrayEquals(lengthObjectAddress, offset[0])) {

lengthByteIndex = offset[1];
phá vỡ;

}

}
while (lengthObject.indexOf('endobj') <= -1) {

lengthObject = lengthObject + String.fromCharCode(bytes[lengthByteIndex]);
lengthByteIndex++;

}
streamLength = +(lengthObject.match(/obj\s*\n\s*\d+\s*\n\s*endobj/)[0].match(/\d+/)[0]);

} khác {

streamLength = +(/\d+/.exec(lengthFinder)[0]);

}
var streamBytes = bytes.slice(byteIndex, byteIndex + streamLength);
object.push(streamBytes);
byteIndex += streamLength;
while (object[0].indexOf('endobj') <= -1) {

object[0] = object[0] + String.fromCharCode(bytes[byteIndex]);
byteIndex++;

}
return object;

}
object[0] = object[0] + String.fromCharCode(bytes[byteIndex]);
byteIndex++;

}
return object[0];

}
function arrayEquals(array1, array2) {

if (array1 == array2) {

trả về giá trị đúng;

}
if (array1 == null && array2 == null) {

trả về giá trị đúng;

} else if (array1 == null || array2 == null) {

trả về false;

}
if (array1.length != array2.length) {

trả về false;

}
for (var index = 0; index < array1.length; index++) {

if (Array.isArray(array1[index])) {

if (!arrayEquals(array1[index], array2[index])) {

trả về false;

}
continue;

}
if (array1[index] != array2[index]) {

trả về false;

}

}
trả về giá trị đúng;

}
function getDependencies(objectString, objectByteOffsets, bytes) {

var dependencies = [];
var references = objectString.match(/\d+\s+\d+\s+R/g);
if (references != null) {

while (references.length > 0) {

if (/\/Parent/.test(objectString.slice(objectString.indexOf(references[0]) - 8, objectString.indexOf(references[0])))) {

references.shift();
continue;

}
var dependency = getObject(references.shift().replace('R', 'obj'), objectByteOffsets, bytes);
var dependencyExists = false;
for each (var entry in dependencies) {

dependencyExists = (arrayEquals(dependency, entry)) ? true : dependencyExists;

}
if (!dependencyExists) {

dependencies.push(dependency);

}
if (Array.isArray(dependency)) {

dependencies = dependencies.concat(getDependencies(dependency[0], objectByteOffsets, bytes));

} khác {

dependencies = dependencies.concat(getDependencies(dependency, objectByteOffsets, bytes));

}

}

}
return dependencies;

}

}

基本上这里发生的事情是每个文件都有包含其页面及其内容和资源的对象。然后这些对象被重新编号并使用新文件的新“包装”进行格式化。

我写了这段代码用于两个文件,但我想象可能需要更多,所以我让代码为此工作。为有效的原题开头的函数,
function(mergePdfs(directory, name, pdf1, pdf2, opt_pdf3) {

if (name.slice(-4) != '.pdf') {

name = name + '.pdf';

}
var newObjects = ['1 0 obj\r\n<>r\nendobj'];
var pageAddresses = [];
for (var argumentIndex = 2; argumentIndex < arguments.length; argumentIndex++) {

var bytes = arguments[argumentIndex].getBlob().getBytes();

应该替换为
function mergePdfs(directory, name) {

if (name.slice(-4) != '.pdf') {

name = name + '.pdf';

}
var newObjects = ['1 0 obj\r\n<>\r\nendobj'];
var pageAddresses = [];
var files = directory.getFiles();
for (var fileIndex = 0; fileIndex < files.length; fileIndex++) {

var bytes = files[fileIndex].getBlob().getBytes();

关于pdf - 将多个 PDF 合并为一个 PDF,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/15414077/

28 4 0
Bài viết được đề xuất: Perl 默认变量 $_
Bài viết được đề xuất: r - 如何使 ggplot2 绘图更漂亮?
Bài viết được đề xuất: .NET 包管理
Bài viết được đề xuất: hash - 有人可以解释盐在存储散列密码时如何帮助吗?
行者123
Hồ sơ cá nhân

Tôi là một lập trình viên xuất sắc, rất giỏi!

Nhận phiếu giảm giá Didi Taxi miễn phí
Mã giảm giá Didi Taxi
Giấy chứng nhận ICP Bắc Kinh số 000000
Hợp tác quảng cáo: 1813099741@qq.com 6ren.com