You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
697 lines
16 KiB
697 lines
16 KiB
/**
|
|
* @licstart The following is the entire license notice for the
|
|
* Javascript code in this page
|
|
*
|
|
* Copyright 2020 Mozilla Foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*
|
|
* @licend The above is the entire license notice for the
|
|
* Javascript code in this page
|
|
*/
|
|
"use strict";
|
|
|
|
Object.defineProperty(exports, "__esModule", {
|
|
value: true
|
|
});
|
|
exports.PDFFindController = exports.FindState = void 0;
|
|
|
|
var _pdf = require("../pdf");
|
|
|
|
var _pdf_find_utils = require("./pdf_find_utils.js");
|
|
|
|
var _ui_utils = require("./ui_utils.js");
|
|
|
|
const FindState = {
|
|
FOUND: 0,
|
|
NOT_FOUND: 1,
|
|
WRAPPED: 2,
|
|
PENDING: 3
|
|
};
|
|
exports.FindState = FindState;
|
|
const FIND_TIMEOUT = 250;
|
|
const MATCH_SCROLL_OFFSET_TOP = -50;
|
|
const MATCH_SCROLL_OFFSET_LEFT = -400;
|
|
const CHARACTERS_TO_NORMALIZE = {
|
|
"\u2018": "'",
|
|
"\u2019": "'",
|
|
"\u201A": "'",
|
|
"\u201B": "'",
|
|
"\u201C": '"',
|
|
"\u201D": '"',
|
|
"\u201E": '"',
|
|
"\u201F": '"',
|
|
"\u00BC": "1/4",
|
|
"\u00BD": "1/2",
|
|
"\u00BE": "3/4"
|
|
};
|
|
let normalizationRegex = null;
|
|
|
|
function normalize(text) {
|
|
if (!normalizationRegex) {
|
|
const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join("");
|
|
normalizationRegex = new RegExp(`[${replace}]`, "g");
|
|
}
|
|
|
|
return text.replace(normalizationRegex, function (ch) {
|
|
return CHARACTERS_TO_NORMALIZE[ch];
|
|
});
|
|
}
|
|
|
|
class PDFFindController {
|
|
constructor({
|
|
linkService,
|
|
eventBus
|
|
}) {
|
|
this._linkService = linkService;
|
|
this._eventBus = eventBus;
|
|
|
|
this._reset();
|
|
|
|
eventBus._on("findbarclose", this._onFindBarClose.bind(this));
|
|
}
|
|
|
|
get highlightMatches() {
|
|
return this._highlightMatches;
|
|
}
|
|
|
|
get pageMatches() {
|
|
return this._pageMatches;
|
|
}
|
|
|
|
get pageMatchesLength() {
|
|
return this._pageMatchesLength;
|
|
}
|
|
|
|
get selected() {
|
|
return this._selected;
|
|
}
|
|
|
|
get state() {
|
|
return this._state;
|
|
}
|
|
|
|
setDocument(pdfDocument) {
|
|
if (this._pdfDocument) {
|
|
this._reset();
|
|
}
|
|
|
|
if (!pdfDocument) {
|
|
return;
|
|
}
|
|
|
|
this._pdfDocument = pdfDocument;
|
|
|
|
this._firstPageCapability.resolve();
|
|
}
|
|
|
|
executeCommand(cmd, state) {
|
|
if (!state) {
|
|
return;
|
|
}
|
|
|
|
const pdfDocument = this._pdfDocument;
|
|
|
|
if (this._state === null || this._shouldDirtyMatch(cmd, state)) {
|
|
this._dirtyMatch = true;
|
|
}
|
|
|
|
this._state = state;
|
|
|
|
if (cmd !== "findhighlightallchange") {
|
|
this._updateUIState(FindState.PENDING);
|
|
}
|
|
|
|
this._firstPageCapability.promise.then(() => {
|
|
if (!this._pdfDocument || pdfDocument && this._pdfDocument !== pdfDocument) {
|
|
return;
|
|
}
|
|
|
|
this._extractText();
|
|
|
|
const findbarClosed = !this._highlightMatches;
|
|
const pendingTimeout = !!this._findTimeout;
|
|
|
|
if (this._findTimeout) {
|
|
clearTimeout(this._findTimeout);
|
|
this._findTimeout = null;
|
|
}
|
|
|
|
if (cmd === "find") {
|
|
this._findTimeout = setTimeout(() => {
|
|
this._nextMatch();
|
|
|
|
this._findTimeout = null;
|
|
}, FIND_TIMEOUT);
|
|
} else if (this._dirtyMatch) {
|
|
this._nextMatch();
|
|
} else if (cmd === "findagain") {
|
|
this._nextMatch();
|
|
|
|
if (findbarClosed && this._state.highlightAll) {
|
|
this._updateAllPages();
|
|
}
|
|
} else if (cmd === "findhighlightallchange") {
|
|
if (pendingTimeout) {
|
|
this._nextMatch();
|
|
} else {
|
|
this._highlightMatches = true;
|
|
}
|
|
|
|
this._updateAllPages();
|
|
} else {
|
|
this._nextMatch();
|
|
}
|
|
});
|
|
}
|
|
|
|
scrollMatchIntoView({
|
|
element = null,
|
|
pageIndex = -1,
|
|
matchIndex = -1
|
|
}) {
|
|
if (!this._scrollMatches || !element) {
|
|
return;
|
|
} else if (matchIndex === -1 || matchIndex !== this._selected.matchIdx) {
|
|
return;
|
|
} else if (pageIndex === -1 || pageIndex !== this._selected.pageIdx) {
|
|
return;
|
|
}
|
|
|
|
this._scrollMatches = false;
|
|
const spot = {
|
|
top: MATCH_SCROLL_OFFSET_TOP,
|
|
left: MATCH_SCROLL_OFFSET_LEFT
|
|
};
|
|
(0, _ui_utils.scrollIntoView)(element, spot, true);
|
|
}
|
|
|
|
_reset() {
|
|
this._highlightMatches = false;
|
|
this._scrollMatches = false;
|
|
this._pdfDocument = null;
|
|
this._pageMatches = [];
|
|
this._pageMatchesLength = [];
|
|
this._state = null;
|
|
this._selected = {
|
|
pageIdx: -1,
|
|
matchIdx: -1
|
|
};
|
|
this._offset = {
|
|
pageIdx: null,
|
|
matchIdx: null,
|
|
wrapped: false
|
|
};
|
|
this._extractTextPromises = [];
|
|
this._pageContents = [];
|
|
this._matchesCountTotal = 0;
|
|
this._pagesToSearch = null;
|
|
this._pendingFindMatches = Object.create(null);
|
|
this._resumePageIdx = null;
|
|
this._dirtyMatch = false;
|
|
clearTimeout(this._findTimeout);
|
|
this._findTimeout = null;
|
|
this._firstPageCapability = (0, _pdf.createPromiseCapability)();
|
|
}
|
|
|
|
get _query() {
|
|
if (this._state.query !== this._rawQuery) {
|
|
this._rawQuery = this._state.query;
|
|
this._normalizedQuery = normalize(this._state.query);
|
|
}
|
|
|
|
return this._normalizedQuery;
|
|
}
|
|
|
|
_shouldDirtyMatch(cmd, state) {
|
|
if (state.query !== this._state.query) {
|
|
return true;
|
|
}
|
|
|
|
switch (cmd) {
|
|
case "findagain":
|
|
const pageNumber = this._selected.pageIdx + 1;
|
|
const linkService = this._linkService;
|
|
|
|
if (pageNumber >= 1 && pageNumber <= linkService.pagesCount && pageNumber !== linkService.page && !linkService.isPageVisible(pageNumber)) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
|
|
case "findhighlightallchange":
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
_prepareMatches(matchesWithLength, matches, matchesLength) {
|
|
function isSubTerm(currentIndex) {
|
|
const currentElem = matchesWithLength[currentIndex];
|
|
const nextElem = matchesWithLength[currentIndex + 1];
|
|
|
|
if (currentIndex < matchesWithLength.length - 1 && currentElem.match === nextElem.match) {
|
|
currentElem.skipped = true;
|
|
return true;
|
|
}
|
|
|
|
for (let i = currentIndex - 1; i >= 0; i--) {
|
|
const prevElem = matchesWithLength[i];
|
|
|
|
if (prevElem.skipped) {
|
|
continue;
|
|
}
|
|
|
|
if (prevElem.match + prevElem.matchLength < currentElem.match) {
|
|
break;
|
|
}
|
|
|
|
if (prevElem.match + prevElem.matchLength >= currentElem.match + currentElem.matchLength) {
|
|
currentElem.skipped = true;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
matchesWithLength.sort(function (a, b) {
|
|
return a.match === b.match ? a.matchLength - b.matchLength : a.match - b.match;
|
|
});
|
|
|
|
for (let i = 0, len = matchesWithLength.length; i < len; i++) {
|
|
if (isSubTerm(i)) {
|
|
continue;
|
|
}
|
|
|
|
matches.push(matchesWithLength[i].match);
|
|
matchesLength.push(matchesWithLength[i].matchLength);
|
|
}
|
|
}
|
|
|
|
_isEntireWord(content, startIdx, length) {
|
|
if (startIdx > 0) {
|
|
const first = content.charCodeAt(startIdx);
|
|
const limit = content.charCodeAt(startIdx - 1);
|
|
|
|
if ((0, _pdf_find_utils.getCharacterType)(first) === (0, _pdf_find_utils.getCharacterType)(limit)) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
const endIdx = startIdx + length - 1;
|
|
|
|
if (endIdx < content.length - 1) {
|
|
const last = content.charCodeAt(endIdx);
|
|
const limit = content.charCodeAt(endIdx + 1);
|
|
|
|
if ((0, _pdf_find_utils.getCharacterType)(last) === (0, _pdf_find_utils.getCharacterType)(limit)) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
_calculatePhraseMatch(query, pageIndex, pageContent, entireWord) {
|
|
const matches = [];
|
|
const queryLen = query.length;
|
|
let matchIdx = -queryLen;
|
|
|
|
while (true) {
|
|
matchIdx = pageContent.indexOf(query, matchIdx + queryLen);
|
|
|
|
if (matchIdx === -1) {
|
|
break;
|
|
}
|
|
|
|
if (entireWord && !this._isEntireWord(pageContent, matchIdx, queryLen)) {
|
|
continue;
|
|
}
|
|
|
|
matches.push(matchIdx);
|
|
}
|
|
|
|
this._pageMatches[pageIndex] = matches;
|
|
}
|
|
|
|
_calculateWordMatch(query, pageIndex, pageContent, entireWord) {
|
|
const matchesWithLength = [];
|
|
const queryArray = query.match(/\S+/g);
|
|
|
|
for (let i = 0, len = queryArray.length; i < len; i++) {
|
|
const subquery = queryArray[i];
|
|
const subqueryLen = subquery.length;
|
|
let matchIdx = -subqueryLen;
|
|
|
|
while (true) {
|
|
matchIdx = pageContent.indexOf(subquery, matchIdx + subqueryLen);
|
|
|
|
if (matchIdx === -1) {
|
|
break;
|
|
}
|
|
|
|
if (entireWord && !this._isEntireWord(pageContent, matchIdx, subqueryLen)) {
|
|
continue;
|
|
}
|
|
|
|
matchesWithLength.push({
|
|
match: matchIdx,
|
|
matchLength: subqueryLen,
|
|
skipped: false
|
|
});
|
|
}
|
|
}
|
|
|
|
this._pageMatchesLength[pageIndex] = [];
|
|
this._pageMatches[pageIndex] = [];
|
|
|
|
this._prepareMatches(matchesWithLength, this._pageMatches[pageIndex], this._pageMatchesLength[pageIndex]);
|
|
}
|
|
|
|
_calculateMatch(pageIndex) {
|
|
let pageContent = this._pageContents[pageIndex];
|
|
let query = this._query;
|
|
const {
|
|
caseSensitive,
|
|
entireWord,
|
|
phraseSearch
|
|
} = this._state;
|
|
|
|
if (query.length === 0) {
|
|
return;
|
|
}
|
|
|
|
if (!caseSensitive) {
|
|
pageContent = pageContent.toLowerCase();
|
|
query = query.toLowerCase();
|
|
}
|
|
|
|
if (phraseSearch) {
|
|
this._calculatePhraseMatch(query, pageIndex, pageContent, entireWord);
|
|
} else {
|
|
this._calculateWordMatch(query, pageIndex, pageContent, entireWord);
|
|
}
|
|
|
|
if (this._state.highlightAll) {
|
|
this._updatePage(pageIndex);
|
|
}
|
|
|
|
if (this._resumePageIdx === pageIndex) {
|
|
this._resumePageIdx = null;
|
|
|
|
this._nextPageMatch();
|
|
}
|
|
|
|
const pageMatchesCount = this._pageMatches[pageIndex].length;
|
|
|
|
if (pageMatchesCount > 0) {
|
|
this._matchesCountTotal += pageMatchesCount;
|
|
|
|
this._updateUIResultsCount();
|
|
}
|
|
}
|
|
|
|
_extractText() {
|
|
if (this._extractTextPromises.length > 0) {
|
|
return;
|
|
}
|
|
|
|
let promise = Promise.resolve();
|
|
|
|
for (let i = 0, ii = this._linkService.pagesCount; i < ii; i++) {
|
|
const extractTextCapability = (0, _pdf.createPromiseCapability)();
|
|
this._extractTextPromises[i] = extractTextCapability.promise;
|
|
promise = promise.then(() => {
|
|
return this._pdfDocument.getPage(i + 1).then(pdfPage => {
|
|
return pdfPage.getTextContent({
|
|
normalizeWhitespace: true
|
|
});
|
|
}).then(textContent => {
|
|
const textItems = textContent.items;
|
|
const strBuf = [];
|
|
|
|
for (let j = 0, jj = textItems.length; j < jj; j++) {
|
|
strBuf.push(textItems[j].str);
|
|
}
|
|
|
|
this._pageContents[i] = normalize(strBuf.join(""));
|
|
extractTextCapability.resolve(i);
|
|
}, reason => {
|
|
console.error(`Unable to get text content for page ${i + 1}`, reason);
|
|
this._pageContents[i] = "";
|
|
extractTextCapability.resolve(i);
|
|
});
|
|
});
|
|
}
|
|
}
|
|
|
|
_updatePage(index) {
|
|
if (this._scrollMatches && this._selected.pageIdx === index) {
|
|
this._linkService.page = index + 1;
|
|
}
|
|
|
|
this._eventBus.dispatch("updatetextlayermatches", {
|
|
source: this,
|
|
pageIndex: index
|
|
});
|
|
}
|
|
|
|
_updateAllPages() {
|
|
this._eventBus.dispatch("updatetextlayermatches", {
|
|
source: this,
|
|
pageIndex: -1
|
|
});
|
|
}
|
|
|
|
_nextMatch() {
|
|
const previous = this._state.findPrevious;
|
|
const currentPageIndex = this._linkService.page - 1;
|
|
const numPages = this._linkService.pagesCount;
|
|
this._highlightMatches = true;
|
|
|
|
if (this._dirtyMatch) {
|
|
this._dirtyMatch = false;
|
|
this._selected.pageIdx = this._selected.matchIdx = -1;
|
|
this._offset.pageIdx = currentPageIndex;
|
|
this._offset.matchIdx = null;
|
|
this._offset.wrapped = false;
|
|
this._resumePageIdx = null;
|
|
this._pageMatches.length = 0;
|
|
this._pageMatchesLength.length = 0;
|
|
this._matchesCountTotal = 0;
|
|
|
|
this._updateAllPages();
|
|
|
|
for (let i = 0; i < numPages; i++) {
|
|
if (this._pendingFindMatches[i] === true) {
|
|
continue;
|
|
}
|
|
|
|
this._pendingFindMatches[i] = true;
|
|
|
|
this._extractTextPromises[i].then(pageIdx => {
|
|
delete this._pendingFindMatches[pageIdx];
|
|
|
|
this._calculateMatch(pageIdx);
|
|
});
|
|
}
|
|
}
|
|
|
|
if (this._query === "") {
|
|
this._updateUIState(FindState.FOUND);
|
|
|
|
return;
|
|
}
|
|
|
|
if (this._resumePageIdx) {
|
|
return;
|
|
}
|
|
|
|
const offset = this._offset;
|
|
this._pagesToSearch = numPages;
|
|
|
|
if (offset.matchIdx !== null) {
|
|
const numPageMatches = this._pageMatches[offset.pageIdx].length;
|
|
|
|
if (!previous && offset.matchIdx + 1 < numPageMatches || previous && offset.matchIdx > 0) {
|
|
offset.matchIdx = previous ? offset.matchIdx - 1 : offset.matchIdx + 1;
|
|
|
|
this._updateMatch(true);
|
|
|
|
return;
|
|
}
|
|
|
|
this._advanceOffsetPage(previous);
|
|
}
|
|
|
|
this._nextPageMatch();
|
|
}
|
|
|
|
_matchesReady(matches) {
|
|
const offset = this._offset;
|
|
const numMatches = matches.length;
|
|
const previous = this._state.findPrevious;
|
|
|
|
if (numMatches) {
|
|
offset.matchIdx = previous ? numMatches - 1 : 0;
|
|
|
|
this._updateMatch(true);
|
|
|
|
return true;
|
|
}
|
|
|
|
this._advanceOffsetPage(previous);
|
|
|
|
if (offset.wrapped) {
|
|
offset.matchIdx = null;
|
|
|
|
if (this._pagesToSearch < 0) {
|
|
this._updateMatch(false);
|
|
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
_nextPageMatch() {
|
|
if (this._resumePageIdx !== null) {
|
|
console.error("There can only be one pending page.");
|
|
}
|
|
|
|
let matches = null;
|
|
|
|
do {
|
|
const pageIdx = this._offset.pageIdx;
|
|
matches = this._pageMatches[pageIdx];
|
|
|
|
if (!matches) {
|
|
this._resumePageIdx = pageIdx;
|
|
break;
|
|
}
|
|
} while (!this._matchesReady(matches));
|
|
}
|
|
|
|
_advanceOffsetPage(previous) {
|
|
const offset = this._offset;
|
|
const numPages = this._linkService.pagesCount;
|
|
offset.pageIdx = previous ? offset.pageIdx - 1 : offset.pageIdx + 1;
|
|
offset.matchIdx = null;
|
|
this._pagesToSearch--;
|
|
|
|
if (offset.pageIdx >= numPages || offset.pageIdx < 0) {
|
|
offset.pageIdx = previous ? numPages - 1 : 0;
|
|
offset.wrapped = true;
|
|
}
|
|
}
|
|
|
|
_updateMatch(found = false) {
|
|
let state = FindState.NOT_FOUND;
|
|
const wrapped = this._offset.wrapped;
|
|
this._offset.wrapped = false;
|
|
|
|
if (found) {
|
|
const previousPage = this._selected.pageIdx;
|
|
this._selected.pageIdx = this._offset.pageIdx;
|
|
this._selected.matchIdx = this._offset.matchIdx;
|
|
state = wrapped ? FindState.WRAPPED : FindState.FOUND;
|
|
|
|
if (previousPage !== -1 && previousPage !== this._selected.pageIdx) {
|
|
this._updatePage(previousPage);
|
|
}
|
|
}
|
|
|
|
this._updateUIState(state, this._state.findPrevious);
|
|
|
|
if (this._selected.pageIdx !== -1) {
|
|
this._scrollMatches = true;
|
|
|
|
this._updatePage(this._selected.pageIdx);
|
|
}
|
|
}
|
|
|
|
_onFindBarClose(evt) {
|
|
const pdfDocument = this._pdfDocument;
|
|
|
|
this._firstPageCapability.promise.then(() => {
|
|
if (!this._pdfDocument || pdfDocument && this._pdfDocument !== pdfDocument) {
|
|
return;
|
|
}
|
|
|
|
if (this._findTimeout) {
|
|
clearTimeout(this._findTimeout);
|
|
this._findTimeout = null;
|
|
}
|
|
|
|
if (this._resumePageIdx) {
|
|
this._resumePageIdx = null;
|
|
this._dirtyMatch = true;
|
|
}
|
|
|
|
this._updateUIState(FindState.FOUND);
|
|
|
|
this._highlightMatches = false;
|
|
|
|
this._updateAllPages();
|
|
});
|
|
}
|
|
|
|
_requestMatchesCount() {
|
|
const {
|
|
pageIdx,
|
|
matchIdx
|
|
} = this._selected;
|
|
let current = 0,
|
|
total = this._matchesCountTotal;
|
|
|
|
if (matchIdx !== -1) {
|
|
for (let i = 0; i < pageIdx; i++) {
|
|
current += this._pageMatches[i] && this._pageMatches[i].length || 0;
|
|
}
|
|
|
|
current += matchIdx + 1;
|
|
}
|
|
|
|
if (current < 1 || current > total) {
|
|
current = total = 0;
|
|
}
|
|
|
|
return {
|
|
current,
|
|
total
|
|
};
|
|
}
|
|
|
|
_updateUIResultsCount() {
|
|
this._eventBus.dispatch("updatefindmatchescount", {
|
|
source: this,
|
|
matchesCount: this._requestMatchesCount()
|
|
});
|
|
}
|
|
|
|
_updateUIState(state, previous) {
|
|
this._eventBus.dispatch("updatefindcontrolstate", {
|
|
source: this,
|
|
state,
|
|
previous,
|
|
matchesCount: this._requestMatchesCount(),
|
|
rawQuery: this._state ? this._state.query : null
|
|
});
|
|
}
|
|
|
|
}
|
|
|
|
exports.PDFFindController = PDFFindController;
|