Some checks failed
		
		
	
	Types tests / Test (lts/*) (push) Has been cancelled
				
			Lint / Lint (lts/*) (push) Has been cancelled
				
			CodeQL / Analyze (javascript) (push) Has been cancelled
				
			CI / Test (20) (push) Has been cancelled
				
			CI / Test (22) (push) Has been cancelled
				
			CI / Test (24) (push) Has been cancelled
				
			
		
			
				
	
	
		
			77 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			77 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
/* Any copyright is dedicated to the Public Domain.
 | 
						|
 * http://creativecommons.org/publicdomain/zero/1.0/ */
 | 
						|
 | 
						|
//
 | 
						|
// Basic node example that prints document metadata and text content.
 | 
						|
//
 | 
						|
 | 
						|
// Run `gulp dist-install` to generate 'pdfjs-dist' npm package files.
 | 
						|
import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
 | 
						|
 | 
						|
// Loading file from file system into typed array
 | 
						|
const pdfPath =
 | 
						|
  process.argv[2] || "../../web/compressed.tracemonkey-pldi-09.pdf";
 | 
						|
 | 
						|
// Will be using promises to load document, pages and misc data instead of
 | 
						|
// callback.
 | 
						|
const loadingTask = getDocument(pdfPath);
 | 
						|
loadingTask.promise
 | 
						|
  .then(function (doc) {
 | 
						|
    const numPages = doc.numPages;
 | 
						|
    console.log("# Document Loaded");
 | 
						|
    console.log("Number of Pages: " + numPages);
 | 
						|
    console.log();
 | 
						|
 | 
						|
    let lastPromise; // will be used to chain promises
 | 
						|
    lastPromise = doc.getMetadata().then(function (data) {
 | 
						|
      console.log("# Metadata Is Loaded");
 | 
						|
      console.log("## Info");
 | 
						|
      console.log(JSON.stringify(data.info, null, 2));
 | 
						|
      console.log();
 | 
						|
      if (data.metadata) {
 | 
						|
        console.log("## Metadata");
 | 
						|
        console.log(JSON.stringify(data.metadata.getAll(), null, 2));
 | 
						|
        console.log();
 | 
						|
      }
 | 
						|
    });
 | 
						|
 | 
						|
    const loadPage = function (pageNum) {
 | 
						|
      return doc.getPage(pageNum).then(function (page) {
 | 
						|
        console.log("# Page " + pageNum);
 | 
						|
        const viewport = page.getViewport({ scale: 1.0 });
 | 
						|
        console.log("Size: " + viewport.width + "x" + viewport.height);
 | 
						|
        console.log();
 | 
						|
        return page
 | 
						|
          .getTextContent()
 | 
						|
          .then(function (content) {
 | 
						|
            // Content contains lots of information about the text layout and
 | 
						|
            // styles, but we need only strings at the moment
 | 
						|
            const strings = content.items.map(function (item) {
 | 
						|
              return item.str;
 | 
						|
            });
 | 
						|
            console.log("## Text Content");
 | 
						|
            console.log(strings.join(" "));
 | 
						|
            // Release page resources.
 | 
						|
            page.cleanup();
 | 
						|
          })
 | 
						|
          .then(function () {
 | 
						|
            console.log();
 | 
						|
          });
 | 
						|
      });
 | 
						|
    };
 | 
						|
    // Loading of the first page will wait on metadata and subsequent loadings
 | 
						|
    // will wait on the previous pages.
 | 
						|
    for (let i = 1; i <= numPages; i++) {
 | 
						|
      lastPromise = lastPromise.then(loadPage.bind(null, i));
 | 
						|
    }
 | 
						|
    return lastPromise;
 | 
						|
  })
 | 
						|
  .then(
 | 
						|
    function () {
 | 
						|
      console.log("# End of Document");
 | 
						|
    },
 | 
						|
    function (err) {
 | 
						|
      console.error("Error: " + err);
 | 
						|
    }
 | 
						|
  );
 |