diff --git a/THIRD-PARTY-NOTICES.txt b/THIRD-PARTY-NOTICES.txt index 357754f5..46fe340b 100644 --- a/THIRD-PARTY-NOTICES.txt +++ b/THIRD-PARTY-NOTICES.txt @@ -303,3 +303,21 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +------------------------------------------- + +@mozilla/readability + +Copyright (c) 2010 Arc90 Inc + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/docs/client-side-migration.md b/docs/client-side-migration.md new file mode 100644 index 00000000..daa08e2e --- /dev/null +++ b/docs/client-side-migration.md @@ -0,0 +1,254 @@ +# WebClipper Client-Side Migration + +## Overview + +This document tracks the experiment to remove server-side dependencies from the OneNote Web Clipper's content processing pipeline and replace them with client-side alternatives. The goal is a fully self-contained browser extension that does not rely on the OneNote augmentation/screenshot server APIs. + +## Server APIs Removed + +### 1. Augmentation API +- **Endpoint:** `onenote.com/onaugmentation/clipperextract/v1.0/` +- **Purpose:** Server-side article/recipe/product extraction using ML models +- **Replacement:** Mozilla Readability (`@mozilla/readability`, Apache 2.0 license) +- **Status:** Complete + +### 2. Full Page Screenshot API (DomEnhancer) +- **Endpoint:** `onenote.com/onaugmentation/clipperDomEnhancer/v1.0/` +- **Purpose:** Server-side Puppeteer rendering of page DOM into full-page screenshots +- **Replacement:** Client-side renderer window with scroll-capture and canvas stitching +- **Status:** Functional, with known issues (see below) + +--- + +## Change 1: Article Extraction with Readability.js + +### What Changed +- `augmentationHelper.ts` — Rewrote `augmentPage()` to use `new Readability(doc).parse()` locally instead of POSTing to the server API +- Removed `makeAugmentationRequest()` method entirely +- Removed imports: `HttpWithRetries`, `OneNoteApiUtils`, `Settings`, `Constants` (URL refs) +- Added metadata mapping: Readability's `title`, `excerpt`, `byline`, `siteName`, `publishedTime` are stored in `PageMetadata` + +### Why Readability.js +- Apache 2.0 license (compatible with WebClipper's MIT license; repo already has Apache 2.0 deps like pdfjs-dist) +- Well-maintained by Mozilla, used in Firefox Reader View +- Produces clean article HTML similar to what the server API returned + +### Other Related Changes +- `clipper.tsx` — Removed `UrlUtils.onWhitelistedDomain()` check that gated augmentation mode; FullPage is now the default clip mode +- `constants.ts` — Removed `augmentationApiUrl` constant +- `readability.d.ts` (new) — TypeScript type declarations for `@mozilla/readability` +- `package.json` — Added `@mozilla/readability` dependency +- `augmentationHelper_tests.ts` — Updated tests for new local implementation + +--- + +## Change 2: Full Page Screenshot with Renderer Window + +### Architecture + +The server-side approach used Puppeteer to render sanitized HTML and produce a full-page screenshot. The client-side replacement mirrors this: + +1. **Store HTML in `chrome.storage.session`** — The page's HTML content, base URL, and localized status text are written to session storage (avoids JSON serialization bottleneck with large payloads) +2. **Open a renderer popup window** — An extension page (`renderer.html`) is opened at the same position/size as the user's browser with `focused: true`. Width is capped at 1280px. Zoom is forced to 100% via `chrome.tabs.setZoom`. Title bar shows localized "Clipping Page" status text +3. **Port-based communication** — The renderer page connects to the service worker via `chrome.runtime.connect({ name: "renderer" })`. Commands (loadContent, scroll) are exchanged over this port +4. **Renderer loads content** — Reads HTML from `chrome.storage.session`, strips ` + + diff --git a/src/scripts/clipperUI/clipper.tsx b/src/scripts/clipperUI/clipper.tsx index 57511271..2d50b9dc 100644 --- a/src/scripts/clipperUI/clipper.tsx +++ b/src/scripts/clipperUI/clipper.tsx @@ -8,7 +8,6 @@ import {PageInfo} from "../pageInfo"; import {Polyfills} from "../polyfills"; import {PreviewGlobalInfo} from "../previewInfo"; import {TooltipType} from "./tooltipType"; -import {UrlUtils} from "../urlUtils"; import {Communicator} from "../communicator/communicator"; import {IFrameMessageHandler} from "../communicator/iframeMessageHandler"; @@ -145,6 +144,13 @@ class ClipperClass extends ComponentBase { this.state.setState({ uiExpanded: !this.state.uiExpanded }); }); + // Called by worker after sign-out from renderer — reset to sign-in state and show the sidebar + Clipper.getExtensionCommunicator().registerFunction(Constants.FunctionKeys.showSignInPanel, () => { + this.state.setState(this.getSignOutState()); + this.state.setState({ uiExpanded: true }); + Clipper.getInjectCommunicator().callRemoteFunction(Constants.FunctionKeys.showUi); + }); + Clipper.getInjectCommunicator().registerFunction(Constants.FunctionKeys.onSpaNavigate, () => { // This could have been called when the UI is already toggled off if (this.state.uiExpanded) { @@ -173,10 +179,21 @@ class ClipperClass extends ComponentBase { }); this.capturePdfScreenshotContent(); - this.captureFullPageScreenshotContent(); this.captureAugmentedContent(); this.captureBookmarkContent(); + // If user is signed in, start capture and hide the injected sidebar — unified renderer window takes over + // If NOT signed in, skip capture so the renderer window doesn't open — show sign-in panel only + try { + if (localStorage.getItem("isUserLoggedIn") === "true") { + this.captureFullPageScreenshotContent(); + // Collapse the UI state so that re-invocation (e.g., after sign-out) correctly + // toggles it back to expanded, rather than toggling from expanded to collapsed + this.state.setState({ uiExpanded: false }); + Clipper.getInjectCommunicator().callRemoteFunction(Constants.FunctionKeys.hideUi); + } + } catch (e) { /* ignore */ } + Clipper.logger.setContextProperty(Log.Context.Custom.ContentType, OneNoteApi.ContentType[updatedPageInfo.contentType]); } }); @@ -232,6 +249,9 @@ class ClipperClass extends ComponentBase { } private captureFullPageScreenshotContent() { + if (this.state.fullPageResult && this.state.fullPageResult.status === Status.InProgress) { + return; + } if (this.state.pageInfo.contentType === OneNoteApi.ContentType.EnhancedUrl) { this.state.setState({ fullPageResult: { @@ -244,7 +264,7 @@ class ClipperClass extends ComponentBase { } else { this.state.setState({ fullPageResult: { status: Status.InProgress } }); - FullPageScreenshotHelper.getFullPageScreenshot(this.state.pageInfo.contentData).then((result) => { + FullPageScreenshotHelper.getFullPageScreenshot(this.state.pageInfo.contentData, this.state.pageInfo.rawUrl, this.state.pageInfo.stylesheetCache, this.state.pageInfo ? this.state.pageInfo.contentTitle : "").then((result) => { this.state.setState({ fullPageResult: { data: result, status: Status.Succeeded } }); }, () => { this.state.setState({ @@ -409,6 +429,14 @@ class ClipperClass extends ComponentBase { this.state.setState({ userResult: { status: Status.Succeeded, data: updatedUser } }); Clipper.logger.setContextProperty(Log.Context.Custom.AuthType, updatedUser.user.authType); Clipper.logger.setContextProperty(Log.Context.Custom.UserInfoId, updatedUser.user.cid); + + // After sign-in completes, hide the injected sidebar and launch the unified renderer window + if (updatedUser.updateReason === UpdateReason.SignInAttempt) { + try { + Clipper.getInjectCommunicator().callRemoteFunction(Constants.FunctionKeys.hideUi); + this.captureFullPageScreenshotContent(); + } catch (e) { /* ignore */ } + } } else { this.state.setState({ userResult: { status: Status.Failed, data: updatedUser } }); } @@ -532,8 +560,20 @@ class ClipperClass extends ComponentBase { private initializeSmartValues() { this.state.currentMode.subscribe((newMode: ClipMode) => { + if (newMode !== ClipMode.FullPage && this.state.fullPageResult && this.state.fullPageResult.status === Status.InProgress) { + // Cancel in-progress screenshot when switching away from FullPage + Clipper.getExtensionCommunicator().callRemoteFunction(Constants.FunctionKeys.cancelFullPageScreenshot); + this.state.setState({ fullPageResult: { status: Status.Failed } }); + } + switch (newMode) { case ClipMode.FullPage: + Clipper.getInjectCommunicator().callRemoteFunction(Constants.FunctionKeys.updatePageInfoIfUrlChanged); + // Retry screenshot if previous attempt failed + if (!this.state.fullPageResult || this.state.fullPageResult.status === Status.Failed || this.state.fullPageResult.status === Status.NotStarted) { + this.captureFullPageScreenshotContent(); + } + break; case ClipMode.Augmentation: Clipper.getInjectCommunicator().callRemoteFunction(Constants.FunctionKeys.updatePageInfoIfUrlChanged); break; @@ -571,9 +611,6 @@ class ClipperClass extends ComponentBase { return ClipMode.Pdf; } - if (UrlUtils.onWhitelistedDomain(this.state.pageInfo.rawUrl)) { - return ClipMode.Augmentation; - } } return ClipMode.FullPage; diff --git a/src/scripts/clipperUI/components/previewViewer/fullPagePreview.tsx b/src/scripts/clipperUI/components/previewViewer/fullPagePreview.tsx index cf56ea8e..2fd5365b 100644 --- a/src/scripts/clipperUI/components/previewViewer/fullPagePreview.tsx +++ b/src/scripts/clipperUI/components/previewViewer/fullPagePreview.tsx @@ -1,9 +1,6 @@ import {Constants} from "../../../constants"; -import {SmartValue} from "../../../communicator/smartValue"; - import {FullPageScreenshotResult} from "../../../contentCapture/fullPageScreenshotHelper"; -import {PdfScreenshotResult} from "../../../contentCapture/pdfScreenshotHelper"; import {ExtensionUtils} from "../../../extensions/extensionUtils"; @@ -18,6 +15,8 @@ import {PreviewComponentBase} from "./previewComponentBase"; import {PreviewViewerFullPageHeader} from "./previewViewerFullPageHeader"; class FullPagePreview extends PreviewComponentBase<{}, ClipperStateProp> { + private currentObjectUrl: string = ""; + protected getContentBodyForCurrentStatus(): any[] { let state = this.props.clipperState; @@ -59,7 +58,8 @@ class FullPagePreview extends PreviewComponentBase<{}, ClipperStateProp> { return Localization.getLocalizedString("WebClipper.Preview.LoadingMessage"); default: case Status.Failed: - failureMessage = this.props.clipperState.fullPageResult.data.failureMessage; + let resultData = this.props.clipperState.fullPageResult.data; + failureMessage = resultData ? resultData.failureMessage : undefined; return !!failureMessage ? failureMessage : noContentFoundString; } } @@ -74,9 +74,12 @@ class FullPagePreview extends PreviewComponentBase<{}, ClipperStateProp> { if (this.props.clipperState.fullPageResult.data) { let screenshotImages: FullPageScreenshotResult = this.props.clipperState.fullPageResult.data; - for (let imageData of screenshotImages.Images) { - let dataUrl = "data:image/" + screenshotImages.ImageFormat + ";" + screenshotImages.ImageEncoding + "," + imageData; - contentBody.push({altTag}); + if (screenshotImages.ImageBlob) { + if (this.currentObjectUrl) { + URL.revokeObjectURL(this.currentObjectUrl); + } + this.currentObjectUrl = URL.createObjectURL(screenshotImages.ImageBlob); + contentBody.push({altTag}); } } break; diff --git a/src/scripts/communicator/offscreenCommunicator.ts b/src/scripts/communicator/offscreenCommunicator.ts index c5769afa..62d61ba7 100644 --- a/src/scripts/communicator/offscreenCommunicator.ts +++ b/src/scripts/communicator/offscreenCommunicator.ts @@ -1,8 +1,11 @@ -import {WebExtension} from "../extensions/webExtensionBase/webExtension"; import {OffscreenMessageTypes} from "./offscreenMessageTypes"; let creating: Promise; // A global promise to avoid concurrency issues +// Use chrome API directly — WebExtension.browser is only initialized in the +// service worker context, but this module is also imported by the clipper UI. +let offscreenUrl = chrome.runtime.getURL("offscreen.html"); + // This function performs basic filtering and error checking on messages before // dispatching the message to a more specific message handler. async function handleResponse(message): Promise { @@ -27,17 +30,21 @@ async function handleResponse(message): Promise { } export async function sendToOffscreenDocument(type: string, data: any): Promise { - const existingContexts = await WebExtension.browser.runtime.getContexts({ - contextTypes: [WebExtension.browser.runtime.ContextType.OFFSCREEN_DOCUMENT], - documentUrls: [WebExtension.offscreenUrl] + // Access newer Chrome APIs via runtime references to avoid hardcoded strings + let chromeRuntime = chrome.runtime as any; + let chromeOffscreen = (chrome as any).offscreen; + + const existingContexts = await chromeRuntime.getContexts({ + contextTypes: [chromeRuntime.ContextType.OFFSCREEN_DOCUMENT], + documentUrls: [offscreenUrl] }); if (creating) { await creating; } else if (existingContexts.length === 0) { - creating = WebExtension.browser.offscreen.createDocument({ - url: WebExtension.offscreenUrl, - reasons: [WebExtension.browser.offscreen.Reason.DOM_PARSER], + creating = chromeOffscreen.createDocument({ + url: offscreenUrl, + reasons: [chromeOffscreen.Reason.DOM_PARSER], justification: "Parse DOM", }); await creating; @@ -45,18 +52,12 @@ export async function sendToOffscreenDocument(type: string, data: any): Promise< } return new Promise(resolve => { - WebExtension.browser.runtime.sendMessage(JSON.stringify({ + chrome.runtime.sendMessage(JSON.stringify({ type: type, target: "offscreen", data: data }), (message) => { handleResponse(message).then((result) => { - /** - * Commenting out the following line in order to always keep 1 offscreen document open - * so as to avoid concurrency issues with multiple offscreen documents. - * TODO: Investigate if there is a better way to handle concurrency issues. - */ - // WebExtension.browser.offscreen.closeDocument(); resolve(result); }); }); diff --git a/src/scripts/constants.ts b/src/scripts/constants.ts index c862082e..331ba0ba 100644 --- a/src/scripts/constants.ts +++ b/src/scripts/constants.ts @@ -314,6 +314,11 @@ export module Constants { export var getMultipleStorageValues = "GET_MULTIPLE_STORAGE_VALUES"; export var getTooltipToRenderInPageNav = "GET_TOOLTIP_TO_RENDER_IN_PAGE_NAV"; export var hideUi = "HIDE_UI"; + export var showUi = "SHOW_UI"; + export var showSignInPanel = "SHOW_SIGN_IN_PANEL"; + export var startRegionCapture = "START_REGION_CAPTURE"; + export var regionCaptureComplete = "REGION_CAPTURE_COMPLETE"; + export var regionCaptureCancelled = "REGION_CAPTURE_CANCELLED"; export var invokeClipper = "INVOKE_CLIPPER"; export var invokeClipperFromPageNav = "INVOKE_CLIPPER_FROM_PAGE_NAV"; export var invokeDebugLogging = "INVOKE_DEBUG_LOGGING"; @@ -330,6 +335,8 @@ export module Constants { export var signOutUser = "SIGN_OUT_USER"; export var tabToLowestIndexedElement = "TAB_TO_LOWEST_INDEXED_ELEMENT"; export var takeTabScreenshot = "TAKE_TAB_SCREENSHOT"; + export var takeFullPageScreenshot = "TAKE_FULL_PAGE_SCREENSHOT"; + export var cancelFullPageScreenshot = "CANCEL_FULL_PAGE_SCREENSHOT"; export var telemetry = "TELEMETRY"; export var toggleClipper = "TOGGLE_CLIPPER"; export var unloadHandler = "UNLOAD_HANDLER"; @@ -382,9 +389,7 @@ export module Constants { export module Urls { export var serviceDomain = "https://www.onenote.com"; - export var augmentationApiUrl = serviceDomain + "/onaugmentation/clipperextract/v1.0/"; export var changelogUrl = serviceDomain + "/whatsnext/webclipper"; - export var fullPageScreenshotUrl = serviceDomain + "/onaugmentation/clipperDomEnhancer/v1.0/"; export var localizedStringsUrlBase = serviceDomain + "/strings?ids=WebClipper."; export var clipperInstallPageUrl = "https://support.microsoft.com/en-us/office/getting-started-with-the-onenote-web-clipper-5696609d-c5ae-4591-b3af-1f897cb6eda6"; diff --git a/src/scripts/contentCapture/augmentationHelper.ts b/src/scripts/contentCapture/augmentationHelper.ts index b5c7ee58..52d21fb0 100644 --- a/src/scripts/contentCapture/augmentationHelper.ts +++ b/src/scripts/contentCapture/augmentationHelper.ts @@ -1,20 +1,16 @@ -import {Constants} from "../constants"; -import {Settings} from "../settings"; import {StringUtils} from "../stringUtils"; import {ObjectUtils} from "../objectUtils"; import {Clipper} from "../clipperUI/frontEndGlobals"; import {ClipperState} from "../clipperUI/clipperState"; -import {OneNoteApiUtils} from "../clipperUI/oneNoteApiUtils"; import {DomUtils, EmbeddedVideoIFrameSrcs} from "../domParsers/domUtils"; -import {HttpWithRetries} from "../http/httpWithRetries"; - import * as Log from "../logging/log"; import {CaptureFailureInfo} from "./captureFailureInfo"; -import { ErrorUtils, ResponsePackage } from "../responsePackage"; + +import {Readability} from "@mozilla/readability"; export enum AugmentationModel { None, @@ -38,43 +34,65 @@ export class AugmentationHelper { public static augmentPage(url: string, locale: string, pageContent: string): Promise { return new Promise((resolve, reject) => { let augmentationEvent = new Log.Event.PromiseEvent(Log.Event.Label.AugmentationApiCall); - - let correlationId = StringUtils.generateGuid(); - augmentationEvent.setCustomProperty(Log.PropertyName.Custom.CorrelationId, correlationId); - - AugmentationHelper.makeAugmentationRequest(url, locale, pageContent, correlationId).then((responsePackage: { parsedResponse: AugmentationResult[], response: Response }) => { - let parsedResponse = responsePackage.parsedResponse; - let result: AugmentationResult = { ContentModel: AugmentationModel.None, ContentObjects: [] }; - - augmentationEvent.setCustomProperty(Log.PropertyName.Custom.CorrelationId, responsePackage.response.headers.get(Constants.HeaderValues.correlationId)); - - if (parsedResponse && parsedResponse.length > 0 && parsedResponse[0].ContentInHtml) { - result = parsedResponse[0]; + augmentationEvent.setCustomProperty(Log.PropertyName.Custom.CorrelationId, StringUtils.generateGuid()); + + try { + let result: AugmentationResult = { ContentModel: AugmentationModel.None, ContentObjects: [] }; + + // Parse the page HTML into a Document for Readability + let doc = (new DOMParser()).parseFromString(pageContent, "text/html"); + + // Clone the document because Readability mutates it + let docClone = doc.cloneNode(true) as Document; + + let reader = new Readability(docClone, { charThreshold: 100 }); + let article = reader.parse(); + + if (article && article.content) { + result.ContentInHtml = article.content; + result.ContentModel = AugmentationModel.Article; + result.ContentObjects = []; + + let metadata: { [key: string]: string } = {}; + if (article.title) { + metadata.title = article.title; + } + if (article.excerpt) { + metadata.description = article.excerpt; + } + if (article.byline) { + metadata.author = article.byline; + } + if (article.siteName) { + metadata.siteName = article.siteName; + } + if (article.publishedTime) { + metadata.publishedTime = article.publishedTime; + } + result.PageMetadata = metadata; augmentationEvent.setCustomProperty(Log.PropertyName.Custom.AugmentationModel, AugmentationModel[result.ContentModel]); // Remove tags that are unsupported by ONML before we display them in the preview - // Supported tags: https://msdn.microsoft.com/en-us/library/office/dn575442.aspx - let doc = (new DOMParser()).parseFromString(result.ContentInHtml, "text/html"); - let previewElement = AugmentationHelper.getArticlePreviewElement(doc); + let contentDoc = (new DOMParser()).parseFromString(result.ContentInHtml, "text/html"); + let previewElement = AugmentationHelper.getArticlePreviewElement(contentDoc); - DomUtils.toOnml(doc).then(async () => { + DomUtils.toOnml(contentDoc).then(async () => { DomUtils.addPreviewContainerStyling(previewElement); await AugmentationHelper.addSupportedVideosToElement(previewElement, pageContent, url); - result.ContentInHtml = doc.body.innerHTML; + result.ContentInHtml = contentDoc.body.innerHTML; resolve(result); }); } else { resolve(result); } - - augmentationEvent.setCustomProperty(Log.PropertyName.Custom.AugmentationModel, AugmentationModel[result.ContentModel]); - }).catch((failure: OneNoteApi.RequestError) => { - OneNoteApiUtils.logOneNoteApiRequestError(augmentationEvent, failure); + } catch (e) { + augmentationEvent.setStatus(Log.Status.Failed); + augmentationEvent.setFailureInfo({ error: e.message || "Readability parsing failed" }); reject(); - }).then(() => { - Clipper.logger.logEvent(augmentationEvent); - }); + } + + Clipper.logger.logEvent(augmentationEvent); }); } @@ -86,8 +104,6 @@ export class AugmentationHelper { return augmentationType; } - // TODO: There is a work-item to change the AugmentationApi to return ContentModel as a StringUtils - // instead of an integer let contentModel: AugmentationModel = state.augmentationResult.data.ContentModel; if (AugmentationHelper.isSupportedAugmentationType(contentModel)) { @@ -97,43 +113,6 @@ export class AugmentationHelper { return augmentationType; } - /* - * Returns the augmented preview text. - */ - public static makeAugmentationRequest(url: string, locale: string, pageContent: string, requestCorrelationId: string): Promise> { - return new Promise>((resolve, reject) => { - Clipper.getUserSessionIdWhenDefined().then((sessionId) => { - let augmentationApiUrl = Constants.Urls.augmentationApiUrl + "?renderMethod=extractAggressive&url=" + url + "&lang=" + locale; - - let headers = {}; - headers[Constants.HeaderValues.appIdKey] = Settings.getSetting("App_Id"); - headers[Constants.HeaderValues.noAuthKey] = "true"; - headers[Constants.HeaderValues.correlationId] = requestCorrelationId; - headers[Constants.HeaderValues.userSessionIdKey] = sessionId; - - HttpWithRetries.post(augmentationApiUrl, pageContent, headers).then((response: Response) => { - response.text().then((responseText: string) => { - let parsedResponse: any; - try { - parsedResponse = JSON.parse(responseText); - } catch (e) { - Clipper.logger.logJsonParseUnexpected(responseText); - ErrorUtils.createRequestErrorObject(response, OneNoteApi.RequestErrorType.UNABLE_TO_PARSE_RESPONSE).then((error) => { - reject(error); - }); - } - - let responsePackage = { - parsedResponse: parsedResponse, - response: response - }; - resolve(responsePackage); - }); - }); - }); - }); - } - public static getArticlePreviewElement(doc: Document): HTMLElement { let mainContainers = doc.getElementsByClassName("MainArticleContainer"); if (ObjectUtils.isNullOrUndefined(mainContainers) || ObjectUtils.isNullOrUndefined(mainContainers[0])) { diff --git a/src/scripts/contentCapture/fullPageScreenshotHelper.ts b/src/scripts/contentCapture/fullPageScreenshotHelper.ts index 505e65f4..14c92788 100644 --- a/src/scripts/contentCapture/fullPageScreenshotHelper.ts +++ b/src/scripts/contentCapture/fullPageScreenshotHelper.ts @@ -1,68 +1,87 @@ import {Clipper} from "../clipperUI/frontEndGlobals"; -import {OneNoteApiUtils} from "../clipperUI/oneNoteApiUtils"; - -import {HttpWithRetries} from "../http/httpWithRetries"; import * as Log from "../logging/log"; import {Constants} from "../constants"; -import {Settings} from "../settings"; +import {Localization} from "../localization/localization"; import {StringUtils} from "../stringUtils"; import {CaptureFailureInfo} from "./captureFailureInfo"; -import { ErrorUtils } from "../responsePackage"; export interface FullPageScreenshotResult extends CaptureFailureInfo { - ImageEncoding?: string; ImageFormat?: string; - Images?: string[]; + ImageBlob?: Blob; + ImageWidth?: number; } export class FullPageScreenshotHelper { - private static timeout = 50000; - - public static getFullPageScreenshot(pageInfoContentData: string): Promise { + public static getFullPageScreenshot(pageInfoContentData: string, pageUrl?: string, stylesheetCache?: { [url: string]: { cssText: string; media: string } }, pageTitle?: string): Promise { return new Promise((resolve, reject) => { - Clipper.getUserSessionIdWhenDefined().then((sessionId) => { - let fullPageScreenshotEvent = new Log.Event.PromiseEvent(Log.Event.Label.FullPageScreenshotCall); + let fullPageScreenshotEvent = new Log.Event.PromiseEvent(Log.Event.Label.FullPageScreenshotCall); + let correlationId = StringUtils.generateGuid(); + fullPageScreenshotEvent.setCustomProperty(Log.PropertyName.Custom.CorrelationId, correlationId); + + let storageData: any = { + fullPageHtmlContent: pageInfoContentData, + fullPageStatusText: Localization.getLocalizedString("WebClipper.ClipType.ScreenShot.ProgressLabel") || "Capturing page...", + fullPageTitle: pageTitle || "", + fullPageUrl: pageUrl || "" + }; + if (pageUrl) { + storageData.fullPageBaseUrl = pageUrl; + } + if (stylesheetCache) { + storageData.fullPageStylesheets = stylesheetCache; + } - let correlationId = StringUtils.generateGuid(); - fullPageScreenshotEvent.setCustomProperty(Log.PropertyName.Custom.CorrelationId, correlationId); + chrome.storage.session.set(storageData, () => { + Clipper.getExtensionCommunicator().callRemoteFunction( + Constants.FunctionKeys.takeFullPageScreenshot, { + callback: (signal: any) => { + if (!signal || !signal.success) { + fullPageScreenshotEvent.setCustomProperty(Log.PropertyName.Custom.FullPageScreenshotContentFound, false); + Clipper.logger.logEvent(fullPageScreenshotEvent); + reject(); + return; + } - let headers = {}; - headers[Constants.HeaderValues.accept] = "application/json"; - headers[Constants.HeaderValues.appIdKey] = Settings.getSetting("App_Id"); - headers[Constants.HeaderValues.noAuthKey] = "true"; - headers[Constants.HeaderValues.correlationId] = correlationId; - headers[Constants.HeaderValues.userSessionIdKey] = sessionId; + // Read single final JPEG from session storage (stitched by renderer) + chrome.storage.session.get(["fullPageFinalImage"], (stored: any) => { + // Keep fullPageFinalImage — worker needs it for save flow + // (URL now passed via port message, no longer read from session) + chrome.storage.session.remove([ + "fullPageHtmlContent", "fullPageBaseUrl", "fullPageStatusText", + "fullPageStylesheets", "fullPageTitle", "fullPageUrl" + ]); - let errorCallback = (error: OneNoteApi.RequestError) => { - fullPageScreenshotEvent.setCustomProperty(Log.PropertyName.Custom.CorrelationId, error.responseHeaders[Constants.HeaderValues.correlationId]); - OneNoteApiUtils.logOneNoteApiRequestError(fullPageScreenshotEvent, error); - }; + let dataUrl: string = stored && stored.fullPageFinalImage ? stored.fullPageFinalImage : ""; - HttpWithRetries.post(Constants.Urls.fullPageScreenshotUrl, pageInfoContentData, headers, [200, 204], FullPageScreenshotHelper.timeout).then((response: Response) => { - if (response.status === 200) { - response.text().then((responseText: string) => { - try { - resolve(JSON.parse(responseText) as FullPageScreenshotResult); - fullPageScreenshotEvent.setCustomProperty(Log.PropertyName.Custom.FullPageScreenshotContentFound, true); - } catch (e) { - ErrorUtils.createRequestErrorObject(response, OneNoteApi.RequestErrorType.UNABLE_TO_PARSE_RESPONSE, FullPageScreenshotHelper.timeout).then((error) => { - reject(error); - }); - } - }); - } else { - fullPageScreenshotEvent.setCustomProperty(Log.PropertyName.Custom.FullPageScreenshotContentFound, false); - reject(); + if (dataUrl) { + // Convert data URL to Blob + fetch(dataUrl).then(function(r) { return r.blob(); }).then(function(imageBlob) { + let result: FullPageScreenshotResult = { + ImageFormat: signal.format || "jpeg", + ImageBlob: imageBlob, + ImageWidth: signal.cssWidth + }; + + fullPageScreenshotEvent.setCustomProperty(Log.PropertyName.Custom.FullPageScreenshotContentFound, true); + Clipper.logger.logEvent(fullPageScreenshotEvent); + resolve(result); + }, function() { + fullPageScreenshotEvent.setCustomProperty(Log.PropertyName.Custom.FullPageScreenshotContentFound, false); + Clipper.logger.logEvent(fullPageScreenshotEvent); + reject(); + }); + } else { + fullPageScreenshotEvent.setCustomProperty(Log.PropertyName.Custom.FullPageScreenshotContentFound, false); + Clipper.logger.logEvent(fullPageScreenshotEvent); + reject(); + } + }); + } } - }, (error: OneNoteApi.RequestError) => { - errorCallback(error); - reject(); - }).then(() => { - Clipper.logger.logEvent(fullPageScreenshotEvent); - }); + ); }); }); } diff --git a/src/scripts/contentCapture/readability.d.ts b/src/scripts/contentCapture/readability.d.ts new file mode 100644 index 00000000..54a6a896 --- /dev/null +++ b/src/scripts/contentCapture/readability.d.ts @@ -0,0 +1,30 @@ +declare module "@mozilla/readability" { + export class Readability { + constructor(doc: Document, options?: { + debug?: boolean; + maxElemsToParse?: number; + nbTopCandidates?: number; + charThreshold?: number; + classesToPreserve?: string[]; + keepClasses?: boolean; + }); + parse(): { + title: string; + content: string; + textContent: string; + length: number; + excerpt: string; + byline: string; + dir: string; + siteName: string; + lang: string; + publishedTime: string; + } | null; + } + + export function isProbablyReaderable(doc: Document, options?: { + minContentLength?: number; + minScore?: number; + visibilityChecker?: (node: Element) => boolean; + }): boolean; +} diff --git a/src/scripts/domParsers/domUtils.ts b/src/scripts/domParsers/domUtils.ts index 7856be96..21492645 100644 --- a/src/scripts/domParsers/domUtils.ts +++ b/src/scripts/domParsers/domUtils.ts @@ -336,6 +336,8 @@ export class DomUtils { */ public static getCleanDomOfCurrentPage(originalDoc: Document): string { let doc = DomUtils.cloneDocument(originalDoc); + DomUtils.inlineHiddenElements(doc, originalDoc); + DomUtils.flattenShadowDomSlots(doc, originalDoc); DomUtils.convertCanvasElementsToImages(doc, originalDoc); DomUtils.addBaseTagIfNecessary(doc, originalDoc.location); @@ -467,6 +469,89 @@ export class DomUtils { return container.insertBefore(spacerNode, referenceNode); } + /** + * Handle elements that were inside web components with shadow DOM. + * cloneNode(true) does NOT clone declarative shadow roots, so slotted content + * (e.g., dropdown panels with slot="dropdown") becomes visible as regular DOM. + * For elements whose shadow-hosted parent hid them via slot CSS, we check the + * original document's computed visibility and inline display:none if hidden. + * + * Also removes