commit 034e9e0d562663aa2b569b9074c91f3a9299c7d7 Author: Sparticuz Date: Mon Sep 26 15:05:08 2022 -0400 Initial Fork from chrome-aws-lambda For repo history, please see: https://github.com/Sparticuz/chrome-aws-lambda/tree/abdc1400082495d838bf05be54afaecf5fadbb37 diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..a0bd1fb --- /dev/null +++ b/.editorconfig @@ -0,0 +1,19 @@ +# http://editorconfig.org + +root = true + +[*] +charset = utf-8 +end_of_line = lf +indent_size = 2 +indent_style = space +insert_final_newline = true +trim_trailing_whitespace = true + +[*.json] +indent_size = 2 +indent_style = space + +[Makefile] +indent_size = 4 +indent_style = tab diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..f129a72 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +github:Sparticuz diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md new file mode 100644 index 0000000..9ef47ea --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -0,0 +1,69 @@ +--- +name: Bug Report +about: Standard Bug Report +title: "[BUG]" +labels: bug +--- + + + +## Environment +* `chrome-aws-lambda` Version: +* `puppeteer` / `puppeteer-core` Version: +* OS: +* Node.js Version: +* Lambda / GCF Runtime: + +## Expected Behavior + + + +## Current Behavior + + + +## Steps to Reproduce + + + + + +## Possible Solution + + diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md new file mode 100644 index 0000000..711cdce --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.md @@ -0,0 +1,14 @@ +--- +name: Feature Request +about: Suggest an Idea or Improvement +title: "[REQUEST]" +labels: enhancement +--- + +## What would you like to have implemented? + + + +## Why would it be useful? + + diff --git a/.github/workflows/aws.yml b/.github/workflows/aws.yml new file mode 100644 index 0000000..d6d63e3 --- /dev/null +++ b/.github/workflows/aws.yml @@ -0,0 +1,66 @@ +name: AWS Lambda CI + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build: + name: Build Lambda Layer + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Setup Node.js + uses: actions/setup-node@v3 + with: + node-version: 16.x + + - name: Install Packages + run: npm install + + - name: Create Lambda Layer + run: make chrome_aws_lambda.zip + + - name: Upload Layer Artifact + uses: actions/upload-artifact@v3 + with: + name: chrome_aws_lambda + path: chrome_aws_lambda.zip + + execute: + name: Lambda (Node ${{ matrix.version }}.x) + needs: build + runs-on: ubuntu-latest + strategy: + matrix: + event: + - example.com + version: + - 14 + - 16 + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Setup Python + uses: actions/setup-python@v3 + with: + python-version: '3.x' + + - name: Setup AWS SAM CLI + uses: aws-actions/setup-sam@v2 + + - name: Download Layer Artifact + uses: actions/download-artifact@v3 + with: + name: chrome_aws_lambda + + - name: Provision Layer + run: unzip chrome_aws_lambda.zip -d _/amazon/code + + - name: Invoke Lambda on SAM + run: sam local invoke --template _/amazon/template.yml --event _/amazon/events/${{ matrix.event }}.json node${{ matrix.version }} 2>&1 | (grep 'Error' && exit 1 || exit 0) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f8e4017 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +.fonts +.idea +*.log +*.pem +*.pem.pub +*.zip +bin/chromium-*.br +build +node_modules +nodejs +package-lock.json +_/amazon/samconfig.toml +_/amazon/.aws-sam diff --git a/.npmignore b/.npmignore new file mode 100644 index 0000000..24117c6 --- /dev/null +++ b/.npmignore @@ -0,0 +1,5 @@ +_ +.fonts +.idea +*.zip +Dockerfile diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..eaf38bb --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2018 Alix Axel +Copyright (c) 2022 Kyle McNally + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..61f0f1c --- /dev/null +++ b/Makefile @@ -0,0 +1,27 @@ +.PHONY: clean + +clean: + rm -rf chrome_aws_lambda.zip _/amazon/code/nodejs + +pretest: + unzip chrome_aws_lambda.zip -d _/amazon/code + +test: + sam local invoke --template _/amazon/template.yml --event _/amazon/events/example.com.json node16 + +.fonts.zip: + zip -9 --filesync --move --recurse-paths .fonts.zip .fonts/ + +%.zip: + npm install --fund=false --package-lock=false + mkdir -p nodejs + npm install --prefix nodejs/ tar-fs@2.1.1 puppeteer-core@17.1.3 --bin-links=false --fund=false --omit=optional --omit=dev --package-lock=false --save=false + npm pack + mkdir -p nodejs/node_modules/@sparticuz/chrome-aws-lambda/ + tar --directory nodejs/node_modules/@sparticuz/chrome-aws-lambda/ --extract --file sparticuz-chrome-aws-lambda-*.tgz --strip-components=1 + npx clean-modules --directory nodejs --include "**/*.d.ts" "**/@types/**" "**/*.@(yaml|yml)" --yes + rm sparticuz-chrome-aws-lambda-*.tgz + mkdir -p $(dir $@) + zip -9 --filesync --move --recurse-paths $@ nodejs + +.DEFAULT_GOAL := chrome_aws_lambda.zip diff --git a/README.md b/README.md new file mode 100644 index 0000000..81a4634 --- /dev/null +++ b/README.md @@ -0,0 +1,379 @@ +# chrome-aws-lambda + +[![@sparticuz/chrome-aws-lambda](https://img.shields.io/npm/v/@sparticuz/chrome-aws-lambda.svg?style=for-the-badge)](https://www.npmjs.com/package/@sparticuz/chrome-aws-lambda) +[![TypeScript](https://img.shields.io/npm/types/chrome-aws-lambda?style=for-the-badge)](https://www.typescriptlang.org/dt/search?search=chrome-aws-lambda) +[![Chromium](https://img.shields.io/badge/chromium-48_MB-brightgreen.svg?style=for-the-badge)](bin/) +[![Donate](https://img.shields.io/badge/donate-paypal-orange.svg?style=for-the-badge)](https://paypal.me/sparticuz) + +Chromium Binary for AWS Lambda and Google Cloud Functions + +### Difference from alixaxel/chrome-aws-lambda + +This fork was born out of [alixaxel/chrome-aws-lambda#264](https://github.com/alixaxel/chrome-aws-lambda/pull/264). +The biggest difference, besides the chromium version, is the inclusion of some code from https://github.com/alixaxel/lambdafs, +as well as dropping that as a dependency. Due to some changes in WebGL, the files in bin/swiftshader.tar.br need to +be extracted to `/tmp` instead of `/tmp/swiftshader`. This necessitated changes in lambdafs. + +## Install + +```shell +npm install @sparticuz/chrome-aws-lambda --save-prod +``` + +This will ship with appropriate binary for the latest stable release of [`puppeteer`](https://github.com/GoogleChrome/puppeteer) (usually updated within a few days). + +You also need to install the corresponding version of `puppeteer-core` (or `puppeteer`): + +```shell +npm install puppeteer-core --save-prod +``` + +If you wish to install an older version of Chromium, take a look at [Versioning](https://github.com/Sparticuz/chrome-aws-lambda#versioning). + +## Usage + +This package works with all the currently supported AWS Lambda Node.js runtimes out of the box. + +```javascript +const chromium = require('@sparticuz/chrome-aws-lambda'); + +exports.handler = async (event, context, callback) => { + let result = null; + let browser = null; + + try { + browser = await chromium.puppeteer.launch({ + args: chromium.args, + defaultViewport: chromium.defaultViewport, + executablePath: await chromium.executablePath, + headless: chromium.headless, + ignoreHTTPSErrors: true, + }); + + let page = await browser.newPage(); + + await page.goto(event.url || 'https://example.com'); + + result = await page.title(); + } catch (error) { + return callback(error); + } finally { + if (browser !== null) { + await browser.close(); + } + } + + return callback(null, result); +}; +``` + +### Usage with Playwright + +```javascript +const chromium = require('@sparticuz/chrome-aws-lambda'); +const playwright = require('playwright-core'); + +(async () => { + const browser = await playwright.chromium.launch({ + args: chromium.args, + executablePath: await chromium.executablePath, + headless: chromium.headless, + }); + + // ... + + await browser.close(); +})(); +``` + +You should allocate at least 512 MB of RAM to your Lambda, however 1600 MB (or more) is recommended. + +### Running Locally + +Please refer to the [Local Development Wiki page](https://github.com/alixaxel/chrome-aws-lambda/wiki/HOWTO:-Local-Development) for instructions and troubleshooting. + +## API + +| Method / Property | Returns | Description | +| ----------------- | -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `font(url)` | `{?Promise}` | Provisions a custom font and returns its basename. | +| `args` | `{!Array}` | Provides a list of recommended additional [Chromium flags](https://github.com/GoogleChrome/chrome-launcher/blob/master/docs/chrome-flags-for-tools.md). | +| `defaultViewport` | `{!Object}` | Returns more sensible default viewport settings. | +| `executablePath` | `{?Promise}` | Returns the path the Chromium binary was extracted to. | +| `headless` | `{!boolean}` | Returns `true` if we are running on AWS Lambda or GCF. | +| `puppeteer` | `{!Object}` | Overloads `puppeteer` and returns the resolved package. | + +## Fonts + +The Amazon Linux 2 AWS Lambda runtime is no longer provisioned with any font faces. + +Because of this, this package ships with [Open Sans](https://fonts.google.com/specimen/Open+Sans), which supports the following scripts: + +* Latin +* Greek +* Cyrillic + +To provision additional fonts, simply call the `font()` method with an absolute path or URL: + +```typescript +await chromium.font('/var/task/fonts/NotoColorEmoji.ttf'); +// or +await chromium.font('https://raw.githack.com/googlei18n/noto-emoji/master/fonts/NotoColorEmoji.ttf'); +``` + +> `Noto Color Emoji` (or similar) is needed if you want to [render emojis](https://getemoji.com/). + +> For URLs, it's recommended that you use a CDN, like [raw.githack.com](https://raw.githack.com/) or [gitcdn.xyz](https://gitcdn.xyz/). + +This method should be invoked _before_ launching Chromium. + +> On non-serverless environments, the `font()` method is a no-op to avoid polluting the user space. + +--- + +Alternatively, it's also possible to provision fonts via AWS Lambda Layers. + +Simply create a directory named `.fonts` and place any font faces you want there: + +``` +.fonts +├── NotoColorEmoji.ttf +└── Roboto.ttf +``` + +Afterwards, you just need to ZIP the directory and upload it as a AWS Lambda Layer: + +```shell +zip -9 --filesync --move --recurse-paths .fonts.zip .fonts/ +``` + +## Overloading + +Since version `8.0.0`, it's possible to [overload puppeteer](/typings/chrome-aws-lambda.d.ts) with the following convenient API: + +```typescript +interface Browser { + defaultPage(...hooks: ((page: Page) => Promise)[]) + newPage(...hooks: ((page: Page) => Promise)[]) +} + +interface BrowserContext { + defaultPage(...hooks: ((page: Page) => Promise)[]) + newPage(...hooks: ((page: Page) => Promise)[]) +} + +interface Page { + block(patterns: string[]) + clear(selector: string) + clickAndWaitForNavigation(selector: string, options?: WaitForOptions) + clickAndWaitForRequest(selector: string, predicate: string | RegExp, options?: WaitTimeoutOptions) + clickAndWaitForRequest(selector: string, predicate: ((request: HTTPRequest) => boolean | Promise), options?: WaitTimeoutOptions) + clickAndWaitForResponse(selector: string, predicate: string | RegExp, options?: WaitTimeoutOptions) + clickAndWaitForResponse(selector: string, predicate: ((request: HTTPResponse) => boolean | Promise), options?: WaitTimeoutOptions) + count(selector: string) + exists(selector: string) + fillFormByLabel(selector: string, data: Record) + fillFormByName(selector: string, data: Record) + fillFormBySelector(selector: string, data: Record) + fillFormByXPath(selector: string, data: Record) + number(selector: string, decimal?: string, property?: string) + selectByLabel(selector: string, ...values: string[]) + string(selector: string, property?: string) + waitForInflightRequests(requests?: number, alpha: number, omega: number, options?: WaitTimeoutOptions) + waitForText(predicate: string, options?: WaitTimeoutOptions) + waitUntilVisible(selector: string, options?: WaitTimeoutOptions) + waitWhileVisible(selector: string, options?: WaitTimeoutOptions) + withTracing(options: TracingOptions, callback: (page: Page) => Promise) +} + +interface Frame { + clear(selector: string) + clickAndWaitForNavigation(selector: string, options?: WaitForOptions) + clickAndWaitForRequest(selector: string, predicate: string | RegExp, options?: WaitTimeoutOptions) + clickAndWaitForRequest(selector: string, predicate: ((request: HTTPRequest) => boolean | Promise), options?: WaitTimeoutOptions) + clickAndWaitForResponse(selector: string, predicate: string | RegExp, options?: WaitTimeoutOptions) + clickAndWaitForResponse(selector: string, predicate: ((request: HTTPResponse) => boolean | Promise), options?: WaitTimeoutOptions) + count(selector: string) + exists(selector: string) + fillFormByLabel(selector: string, data: Record) + fillFormByName(selector: string, data: Record) + fillFormBySelector(selector: string, data: Record) + fillFormByXPath(selector: string, data: Record) + number(selector: string, decimal?: string, property?: string) + selectByLabel(selector: string, ...values: string[]) + string(selector: string, property?: string) + waitForText(predicate: string, options?: WaitTimeoutOptions) + waitUntilVisible(selector: string, options?: WaitTimeoutOptions) + waitWhileVisible(selector: string, options?: WaitTimeoutOptions) +} + +interface ElementHandle { + clear() + clickAndWaitForNavigation(options?: WaitForOptions) + clickAndWaitForRequest(predicate: string | RegExp, options?: WaitTimeoutOptions) + clickAndWaitForRequest(predicate: ((request: HTTPRequest) => boolean | Promise), options?: WaitTimeoutOptions) + clickAndWaitForResponse(predicate: string | RegExp, options?: WaitTimeoutOptions) + clickAndWaitForResponse(predicate: ((request: HTTPResponse) => boolean | Promise), options?: WaitTimeoutOptions) + fillFormByLabel(data: Record) + fillFormByName(data: Record) + fillFormBySelector(data: Record) + fillFormByXPath(data: Record) + getInnerHTML() + getInnerText() + number(decimal?: string, property?: string) + selectByLabel(...values: string[]) + string(property?: string) +} +``` + +To enable this behavior, simply call the `puppeteer` property exposed by this package. + +> Refer to the [TypeScript typings](/typings/chrome-aws-lambda.d.ts) for general documentation. + +## Page Hooks + +When overloaded, you can specify a list of hooks to automatically apply to pages. + +For instance, to remove the `Headless` substring from the user agent: + +```typescript +async function replaceUserAgent(page: Page): Promise { + let value = await page.browser().userAgent(); + + if (value.includes('Headless') === true) { + await page.setUserAgent(value.replace('Headless', '')); + } + + return page; +} +``` + +And then simply pass that page hook to `defaultPage()` or `newPage()`: + +```typescript +let page = await browser.defaultPage(replaceUserAgent); +``` + +> Additional bundled page hooks can be found on [`/build/hooks`](/source/hooks). + +## Versioning + +This package is versioned based on the underlying `puppeteer` minor version: + +| `puppeteer` Version | `chrome-aws-lambda` Version | Chromium Revision | +| ------------------- | --------------------------------------------- | ------------------------------------------------------- | +| `17.1.*` | `npm i @sparticuz/chrome-aws-lambda@~17.1.1` | [`1036745`](https://crrev.com/1036745) (`106.0.5249.0`) | +| `16.1.*` | `npm i @sparticuz/chrome-aws-lambda@~16.1.0` | [`1022525`](https://crrev.com/1011831) (`105.0.5173.0`) | +| `15.5.*` | `npm i @sparticuz/chrome-aws-lambda@~15.5.0` | [`1022525`](https://crrev.com/1011831) (`105.0.5173.0`) | +| `14.4.*` | `npm i @sparticuz/chrome-aws-lambda@~14.4.1` | [`1002410`](https://crrev.com/1002410) (`103.0.5058.0`) | +| `14.3.*` | `npm i @sparticuz/chrome-aws-lambda@~14.3.0` | [`1002410`](https://crrev.com/1002410) (`103.0.5058.0`) | +| `14.2.*` | `npm i @sparticuz/chrome-aws-lambda@~14.2.0` | [`1002410`](https://crrev.com/1002410) (`103.0.5058.0`) | +| `14.1.*` | `npm i @sparticuz/chrome-aws-lambda@~14.1.1` | [`991974`](https://crrev.com/991974) (`102.0.5002.0`) | +| `10.1.*` | `npm i chrome-aws-lambda@~10.1.0` | [`884014`](https://crrev.com/884014) (`92.0.4512.0`) | +| `10.0.*` | `npm i chrome-aws-lambda@~10.0.0` | [`884014`](https://crrev.com/884014) (`92.0.4512.0`) | +| `9.1.*` | `npm i chrome-aws-lambda@~9.1.0` | [`869685`](https://crrev.com/869685) (`91.0.4469.0`) | +| `9.0.*` | `npm i chrome-aws-lambda@~9.0.0` | [`869685`](https://crrev.com/869685) (`91.0.4469.0`) | +| `8.0.*` | `npm i chrome-aws-lambda@~8.0.2` | [`856583`](https://crrev.com/856583) (`90.0.4427.0`) | +| `7.0.*` | `npm i chrome-aws-lambda@~7.0.0` | [`848005`](https://crrev.com/848005) (`90.0.4403.0`) | +| `6.0.*` | `npm i chrome-aws-lambda@~6.0.0` | [`843427`](https://crrev.com/843427) (`89.0.4389.0`) | +| `5.5.*` | `npm i chrome-aws-lambda@~5.5.0` | [`818858`](https://crrev.com/818858) (`88.0.4298.0`) | +| `5.4.*` | `npm i chrome-aws-lambda@~5.4.0` | [`809590`](https://crrev.com/809590) (`87.0.4272.0`) | +| `5.3.*` | `npm i chrome-aws-lambda@~5.3.1` | [`800071`](https://crrev.com/800071) (`86.0.4240.0`) | +| `5.2.*` | `npm i chrome-aws-lambda@~5.2.1` | [`782078`](https://crrev.com/782078) (`85.0.4182.0`) | +| `5.1.*` | `npm i chrome-aws-lambda@~5.1.0` | [`768783`](https://crrev.com/768783) (`84.0.4147.0`) | +| `5.0.*` | `npm i chrome-aws-lambda@~5.0.0` | [`756035`](https://crrev.com/756035) (`83.0.4103.0`) | +| `3.1.*` | `npm i chrome-aws-lambda@~3.1.1` | [`756035`](https://crrev.com/756035) (`83.0.4103.0`) | +| `3.0.*` | `npm i chrome-aws-lambda@~3.0.4` | [`737027`](https://crrev.com/737027) (`81.0.4044.0`) | +| `2.1.*` | `npm i chrome-aws-lambda@~2.1.1` | [`722234`](https://crrev.com/722234) (`80.0.3987.0`) | +| `2.0.*` | `npm i chrome-aws-lambda@~2.0.2` | [`705776`](https://crrev.com/705776) (`79.0.3945.0`) | +| `1.20.*` | `npm i chrome-aws-lambda@~1.20.4` | [`686378`](https://crrev.com/686378) (`78.0.3882.0`) | +| `1.19.*` | `npm i chrome-aws-lambda@~1.19.0` | [`674921`](https://crrev.com/674921) (`77.0.3844.0`) | +| `1.18.*` | `npm i chrome-aws-lambda@~1.18.1` | [`672088`](https://crrev.com/672088) (`77.0.3835.0`) | +| `1.18.*` | `npm i chrome-aws-lambda@~1.18.0` | [`669486`](https://crrev.com/669486) (`77.0.3827.0`) | +| `1.17.*` | `npm i chrome-aws-lambda@~1.17.1` | [`662092`](https://crrev.com/662092) (`76.0.3803.0`) | +| `1.16.*` | `npm i chrome-aws-lambda@~1.16.1` | [`656675`](https://crrev.com/656675) (`76.0.3786.0`) | +| `1.15.*` | `npm i chrome-aws-lambda@~1.15.1` | [`650583`](https://crrev.com/650583) (`75.0.3765.0`) | +| `1.14.*` | `npm i chrome-aws-lambda@~1.14.0` | [`641577`](https://crrev.com/641577) (`75.0.3738.0`) | +| `1.13.*` | `npm i chrome-aws-lambda@~1.13.0` | [`637110`](https://crrev.com/637110) (`74.0.3723.0`) | +| `1.12.*` | `npm i chrome-aws-lambda@~1.12.2` | [`624492`](https://crrev.com/624492) (`73.0.3679.0`) | +| `1.11.*` | `npm i chrome-aws-lambda@~1.11.2` | [`609904`](https://crrev.com/609904) (`72.0.3618.0`) | +| `1.10.*` | `npm i chrome-aws-lambda@~1.10.1` | [`604907`](https://crrev.com/604907) (`72.0.3582.0`) | +| `1.9.*` | `npm i chrome-aws-lambda@~1.9.1` | [`594312`](https://crrev.com/594312) (`71.0.3563.0`) | +| `1.8.*` | `npm i chrome-aws-lambda@~1.8.0` | [`588429`](https://crrev.com/588429) (`71.0.3542.0`) | +| `1.7.*` | `npm i chrome-aws-lambda@~1.7.0` | [`579032`](https://crrev.com/579032) (`70.0.3508.0`) | +| `1.6.*` | `npm i chrome-aws-lambda@~1.6.3` | [`575458`](https://crrev.com/575458) (`69.0.3494.0`) | +| `1.5.*` | `npm i chrome-aws-lambda@~1.5.0` | [`564778`](https://crrev.com/564778) (`69.0.3452.0`) | +| `1.4.*` | `npm i chrome-aws-lambda@~1.4.0` | [`555668`](https://crrev.com/555668) (`68.0.3419.0`) | +| `1.3.*` | `npm i chrome-aws-lambda@~1.3.0` | [`549031`](https://crrev.com/549031) (`67.0.3391.0`) | +| `1.2.*` | `npm i chrome-aws-lambda@~1.2.0` | [`543305`](https://crrev.com/543305) (`67.0.3372.0`) | +| `1.1.*` | `npm i chrome-aws-lambda@~1.1.0` | [`536395`](https://crrev.com/536395) (`66.0.3347.0`) | +| `1.0.*` | `npm i chrome-aws-lambda@~1.0.0` | [`526987`](https://crrev.com/526987) (`65.0.3312.0`) | +| `0.13.*` | `npm i chrome-aws-lambda@~0.13.0` | [`515411`](https://crrev.com/515411) (`64.0.3264.0`) | + +Patch versions are reserved for bug fixes in `chrome-aws-lambda` and general maintenance. + +## Compiling + +To compile your own version of Chromium check the [Ansible playbook instructions](_/ansible). + +## AWS Lambda Layer + +[Lambda Layers](https://docs.aws.amazon.com/lambda/latest/dg/configuration-layers.html) is a new convenient way to manage common dependencies between different Lambda Functions. + +The following set of (Linux) commands will create a layer of this package alongside `puppeteer-core`: + +```shell +git clone --depth=1 https://github.com/alixaxel/chrome-aws-lambda.git && \ +cd chrome-aws-lambda && \ +make chrome_aws_lambda.zip +``` + +The above will create a `chrome-aws-lambda.zip` file, which can be uploaded to your Layers console. + +Alternatively, you can also download the layer artifact from one of our [CI workflow runs](https://github.com/Sparticuz/chrome-aws-lambda/actions/workflows/aws.yml?query=is%3Asuccess+branch%3Amaster). + +## Google Cloud Functions + +Since version `1.11.2`, it's also possible to use this package on Google/Firebase Cloud Functions. + +According to our benchmarks, it's 40% to 50% faster than using the off-the-shelf `puppeteer` bundle. + +## Compression + +The Chromium binary is compressed using the Brotli algorithm. + +This allows us to get the best compression ratio and faster decompression times. + +| File | Algorithm | Level | Bytes | MiB | % | Inflation | +| ------------- | --------- | ----- | --------- | --------- | ---------- | ---------- | +| `chromium` | - | - | 136964856 | 130.62 | - | - | +| `chromium.gz` | Gzip | 1 | 51662087 | 49.27 | 62.28% | 1.035s | +| `chromium.gz` | Gzip | 2 | 50438352 | 48.10 | 63.17% | 1.016s | +| `chromium.gz` | Gzip | 3 | 49428459 | 47.14 | 63.91% | 0.968s | +| `chromium.gz` | Gzip | 4 | 47873978 | 45.66 | 65.05% | 0.950s | +| `chromium.gz` | Gzip | 5 | 46929422 | 44.76 | 65.74% | 0.938s | +| `chromium.gz` | Gzip | 6 | 46522529 | 44.37 | 66.03% | 0.919s | +| `chromium.gz` | Gzip | 7 | 46406406 | 44.26 | 66.12% | 0.917s | +| `chromium.gz` | Gzip | 8 | 46297917 | 44.15 | 66.20% | 0.916s | +| `chromium.gz` | Gzip | 9 | 46270972 | 44.13 | 66.22% | 0.968s | +| `chromium.gz` | Zopfli | 10 | 45089161 | 43.00 | 67.08% | 0.919s | +| `chromium.gz` | Zopfli | 20 | 45085868 | 43.00 | 67.08% | 0.919s | +| `chromium.gz` | Zopfli | 30 | 45085003 | 43.00 | 67.08% | 0.925s | +| `chromium.gz` | Zopfli | 40 | 45084328 | 43.00 | 67.08% | 0.921s | +| `chromium.gz` | Zopfli | 50 | 45084098 | 43.00 | 67.08% | 0.935s | +| `chromium.br` | Brotli | 0 | 55401211 | 52.83 | 59.55% | 0.778s | +| `chromium.br` | Brotli | 1 | 54429523 | 51.91 | 60.26% | 0.757s | +| `chromium.br` | Brotli | 2 | 46436126 | 44.28 | 66.10% | 0.659s | +| `chromium.br` | Brotli | 3 | 46122033 | 43.99 | 66.33% | 0.616s | +| `chromium.br` | Brotli | 4 | 45050239 | 42.96 | 67.11% | 0.692s | +| `chromium.br` | Brotli | 5 | 40813510 | 38.92 | 70.20% | **0.598s** | +| `chromium.br` | Brotli | 6 | 40116951 | 38.26 | 70.71% | 0.601s | +| `chromium.br` | Brotli | 7 | 39302281 | 37.48 | 71.30% | 0.615s | +| `chromium.br` | Brotli | 8 | 39038303 | 37.23 | 71.50% | 0.668s | +| `chromium.br` | Brotli | 9 | 38853994 | 37.05 | 71.63% | 0.673s | +| `chromium.br` | Brotli | 10 | 36090087 | 34.42 | 73.65% | 0.765s | +| `chromium.br` | Brotli | 11 | 34820408 | **33.21** | **74.58%** | 0.712s | + +## License + +MIT diff --git a/_/amazon/code/.gitignore b/_/amazon/code/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/_/amazon/events/example.com.json b/_/amazon/events/example.com.json new file mode 100644 index 0000000..a150281 --- /dev/null +++ b/_/amazon/events/example.com.json @@ -0,0 +1,23 @@ +[ + { + "url": "https://example.com", + "expected": { + "title": "Example Domain", + "screenshot": "72e10960dcf78c864f3d3635e3beb5be394daf40" + } + }, + { + "url": "https://example.com", + "expected": { + "title": "Example Domain", + "screenshot": "72e10960dcf78c864f3d3635e3beb5be394daf40" + } + }, + { + "url": "https://get.webgl.org", + "expected": { + "remove": "logo-container", + "screenshot": "25ac96a4e44f338f5362c18da2b2823ee599c330" + } + } +] diff --git a/_/amazon/handlers/index.js b/_/amazon/handlers/index.js new file mode 100644 index 0000000..ea23d87 --- /dev/null +++ b/_/amazon/handlers/index.js @@ -0,0 +1,57 @@ +const { ok } = require('assert'); +const { createHash } = require('crypto'); +const chromium = require('@sparticuz/chrome-aws-lambda'); + +exports.handler = async (event, context) => { + let browser = null; + + try { + const browser = await chromium.puppeteer.launch({ + args: chromium.args, + defaultViewport: chromium.defaultViewport, + executablePath: await chromium.executablePath, + headless: chromium.headless, + ignoreHTTPSErrors: true, + }); + + const contexts = [ + browser.defaultBrowserContext(), + ]; + + while (contexts.length < event.length) { + contexts.push(await browser.createIncognitoBrowserContext()); + } + + for (let context of contexts) { + const job = event.shift(); + const page = await context.defaultPage(); + + if (job.hasOwnProperty('url') === true) { + await page.goto(job.url, { waitUntil: ['domcontentloaded', 'load'] }); + + if (job.hasOwnProperty('expected') === true) { + if (job.expected.hasOwnProperty('title') === true) { + ok(await page.title() === job.expected.title, `Title assertion failed.`); + } + + if (job.expected.hasOwnProperty('screenshot') === true) { + if (job.expected.hasOwnProperty('remove') === true ) { + await page.evaluate((selector) => { + document.getElementById(selector).remove(); + }, job.expected.remove); + } + ok(createHash('sha1').update((await page.screenshot()).toString('base64')).digest('hex') === job.expected.screenshot, `Screenshot assertion failed.`); + } + } + } + } + } catch (error) { + throw error.message; + } finally { + if (browser !== null) { + await browser.close(); + } + } + + return true; +}; diff --git a/_/amazon/template.yml b/_/amazon/template.yml new file mode 100644 index 0000000..29b833f --- /dev/null +++ b/_/amazon/template.yml @@ -0,0 +1,39 @@ +AWSTemplateFormatVersion: "2010-09-09" +Transform: AWS::Serverless-2016-10-31 +Globals: + Function: + MemorySize: 2048 + Timeout: 30 + +Resources: + layer: + Type: AWS::Serverless::LayerVersion + Properties: + LayerName: sparticuz-chrome-aws-lambda + ContentUri: code/ + CompatibleRuntimes: + - nodejs14.x + - nodejs16.x + + node14: + Type: AWS::Serverless::Function + Properties: + Layers: + - !Ref layer + Handler: handlers/index.handler + Runtime: nodejs14.x + Policies: + - AWSLambdaBasicExecutionRole + - AWSXRayDaemonWriteAccess + Tracing: Active + node16: + Type: AWS::Serverless::Function + Properties: + Layers: + - !Ref layer + Handler: handlers/index.handler + Runtime: nodejs16.x + Policies: + - AWSLambdaBasicExecutionRole + - AWSXRayDaemonWriteAccess + Tracing: Active diff --git a/_/ansible/Makefile b/_/ansible/Makefile new file mode 100644 index 0000000..323006a --- /dev/null +++ b/_/ansible/Makefile @@ -0,0 +1,9 @@ +.PHONY: ansible chromium + +dependencies: + sudo apt install python3-pip zip + pip install ansible boto boto3 aws-sam-cli + echo "Docker is also required in order to test the package, please install docker or Docker Desktop" + +chromium: + ansible-playbook plays/chromium.yml -i inventory.ini diff --git a/_/ansible/README.md b/_/ansible/README.md new file mode 100644 index 0000000..e98baec --- /dev/null +++ b/_/ansible/README.md @@ -0,0 +1,31 @@ +# Chromium Playbook + +This Ansible playbook will launch an EC2 `c6a.12xlarge` Spot Instance and compile Chromium statically. + +Once the compilation finishes, the binary will be compressed with Brotli and downloaded. + +The whole process usually takes around 1 hour to on a `c6a.12xlarge` instance. + +## Chromium Version + +To compile a specific version of Chromium, update the `puppeteer_version` variable in the Ansible inventory, i.e.: + +```shell +puppeteer_version=v1.9.0 +``` + +If not specified, the current `main` will be used. + +## Usage + +```shell +AWS_REGION=us-east-1 \ +AWS_ACCESS_KEY=XXXXXXXXXXXXXXXXXXXX \ +AWS_SECRET_KEY=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX \ +make chromium +``` + +## Requirements + +- [Ansible](http://docs.ansible.com/ansible/latest/intro_installation.html#latest-releases-via-apt-ubuntu) +- AWS SDK for Python (`boto` and `boto3`) diff --git a/_/ansible/ansible.cfg b/_/ansible/ansible.cfg new file mode 100644 index 0000000..0d2a23d --- /dev/null +++ b/_/ansible/ansible.cfg @@ -0,0 +1,8 @@ +[defaults] +hash_behaviour = merge +host_key_checking = false +retry_files_enabled = false + +[ssh_connection] +ssh_args = -C -o ControlMaster=auto -o ControlPersist=60 -o ServerAliveInterval=30 +pipelining = true diff --git a/_/ansible/inventory.ini b/_/ansible/inventory.ini new file mode 100644 index 0000000..6230e05 --- /dev/null +++ b/_/ansible/inventory.ini @@ -0,0 +1,17 @@ +[localhost] +127.0.0.1 + +[localhost:vars] +ansible_connection=local +ansible_python_interpreter=python +image=ami-0309aede310b9cc1f +region=us-east-1 +instance_size=c6a.8xlarge + +[aws] + +[aws:vars] +ansible_connection=ssh +ansible_python_interpreter=auto_silent +ansible_ssh_private_key_file=ansible.pem +puppeteer_version=v17.1.3 diff --git a/_/ansible/plays/.gclient b/_/ansible/plays/.gclient new file mode 100644 index 0000000..decba8d --- /dev/null +++ b/_/ansible/plays/.gclient @@ -0,0 +1,11 @@ +solutions = [ + { + "name": "src", + "url": "https://chromium.googlesource.com/chromium/src.git", + "managed": False, + "custom_deps": {}, + "custom_vars": { + "checkout_pgo_profiles": True, + }, + }, +] diff --git a/_/ansible/plays/chromium.yml b/_/ansible/plays/chromium.yml new file mode 100644 index 0000000..a959b00 --- /dev/null +++ b/_/ansible/plays/chromium.yml @@ -0,0 +1,383 @@ +--- +- name: Bootstrap AWS + hosts: localhost + gather_facts: false + + tasks: + - name: Creating SSH Key + shell: | + ssh-keygen -b 2048 -t rsa -f ansible.pem -q -N '' && \ + chmod 0600 ansible.pem.pub + args: + chdir: .. + creates: ansible.pem + + - name: Creating EC2 Key Pair + amazon.aws.ec2_key: + name: ansible + state: present + region: "{{ region }}" + key_material: "{{ item }}" + with_file: ../ansible.pem.pub + + - name: Creating Security Group + amazon.aws.ec2_group: + name: Chromium + description: SSH Access + state: present + region: "{{ region }}" + rules: + - proto: tcp + to_port: 22 + from_port: 22 + cidr_ip: 0.0.0.0/0 + rules_egress: + - proto: all + cidr_ip: 0.0.0.0/0 + + - name: Request EC2 Instance + amazon.aws.ec2_instance: + count: 1 + ebs_optimized: yes + image: + id: "{{ image }}" + instance_initiated_shutdown_behavior: terminate + instance_type: "{{ instance_size }}" + key_name: ansible + network: + assign_public_ip: yes + delete_on_termination: yes + groups: Chromium + region: "{{ region }}" + security_group: Chromium + state: present + tags: + Name: Chromium + volumes: + - device_name: /dev/xvda + ebs: + delete_on_termination: true + volume_type: io2 + volume_size: 128 + iops: 3000 + register: ec2 + + - name: Registering Host + add_host: + hostname: "{{ item.public_ip_address }}" + groupname: aws + with_items: "{{ ec2.instances }}" + + - name: Waiting for SSH + wait_for: + host: "{{ item.public_ip_address }}" + port: 22 + timeout: 120 + state: started + with_items: "{{ ec2.instances }}" + +- name: AWS + user: ec2-user + hosts: aws + gather_facts: true + environment: + LANG: en_US.UTF-8 + LC_ALL: en_US.UTF-8 + PATH: "{{ ansible_env.PATH }}:/srv/source/depot_tools" + + tasks: + - name: Update system + become: true + become_user: root + shell: | + dnf update --releasever=2022.0.20220831 -y + + - name: Installing Packages + become: true + become_user: root + dnf: + name: + - "@Development Tools" + - alsa-lib-devel + - atk-devel + - bc + - bluez-libs-devel + - bzip2-devel + - cairo-devel + - cmake + - cups-devel + - dbus-devel + - dbus-glib-devel + - dbus-x11 + - expat-devel + - glibc + - glibc-langpack-en + - gperf + - gtk3-devel + - httpd + - java-17-amazon-corretto + - libatomic + - libcap-devel + - libjpeg-devel + - libstdc++ + - libXScrnSaver-devel + - libxkbcommon-x11-devel + - mod_ssl + - ncurses-compat-libs + - nspr-devel + - nss-devel + - pam-devel + - pciutils-devel + - perl + - php + - php-cli + - pulseaudio-libs-devel + - python + - python-psutil + - python-setuptools + - ruby + - xorg-x11-server-Xvfb + - zlib + state: latest + update_cache: true + + - name: Checking for Directory Structure + stat: + path: /srv/source/chromium + register: + structure + + - name: Creating Directory Structure + become: true + become_user: root + file: + path: /srv/{{ item }}/chromium + state: directory + group: ec2-user + owner: ec2-user + recurse: true + with_items: + - build + - source + when: structure.stat.exists != true + + - name: Cloning Depot Tools + git: + repo: https://chromium.googlesource.com/chromium/tools/depot_tools.git + dest: /srv/source/depot_tools + force: yes + update: yes + + - name: Upload .gclient + copy: + src: .gclient + dest: /srv/source/chromium/.gclient + owner: ec2-user + group: ec2-user + mode: '0664' + + - name: Checking for Chromium + stat: + path: /srv/source/chromium/.gclient + register: gclient + + - name: Resolving Puppeteer Version + uri: + url: "https://raw.githubusercontent.com/puppeteer/puppeteer/{{ puppeteer_version | default('main') }}/src/revisions.ts" + return_content: yes + register: puppeteer_revisions + + - name: Resolving Chromium Revision from Puppeteer Version + set_fact: + chromium_revision: > + {{ puppeteer_revisions.content | regex_search("chromium: [']([0-9]*)[']", '\1') | first }} + + - name: Resolving Git Commit from Chromium Revision + uri: + url: "https://cr-rev.appspot.com/_ah/api/crrev/v1/redirect/{{ chromium_revision }}" + return_content: yes + register: revision + + - name: Parse Result + set_fact: + gitsha: > + {{ revision.content | regex_search('"git_sha":"([a-zA-Z0-9_]*)"', '\1') | trim }} + + - name: Checking Out Chromium revision + shell: | + gclient sync --delete_unversioned_trees --revision {{ gitsha | first }} --with_branch_heads + args: + chdir: /srv/source/chromium + + - name: Run Chromium hooks + shell: | + gclient runhooks + args: + chdir: /srv/source/chromium + + - name: Patching Chromium + lineinfile: + path: "/srv/source/chromium/src/content/browser/{{ item.path }}" + line: "{{ item.line }}" + regexp: "{{ item.regexp }}" + state: present + backrefs: yes + with_items: + - { + path: 'sandbox_ipc_linux.cc', + line: '\1PLOG(WARNING) << "poll"; failed_polls = 0;', + regexp: '^(\s+)PLOG[(]WARNING[)] << "poll";$', + } + - { + path: 'renderer_host/render_process_host_impl.cc', + line: '\1// \2\3', + regexp: '^( )(\s*)(CHECK[(]render_process_host->InSameStoragePartition[(])$', + } + - { + path: 'renderer_host/render_process_host_impl.cc', + line: '\1// \2\3', + regexp: '^( )(\s*)(browser_context->GetStoragePartition[(]site_instance,)$', + } + - { + path: 'renderer_host/render_process_host_impl.cc', + line: '\1// \2\3', + regexp: '^( )(\s*)(false /[*] can_create [*]/[)][)][)];)$', + } + + - name: Creating Build Configuration Directory + file: + mode: 0755 + path: /srv/source/chromium/src/out/Headless + state: directory + + - name: Mounting Build Directory in Memory + become: true + become_user: root + shell: | + mount --types tmpfs --options size=24G,nr_inodes=128k,mode=1777 tmpfs /srv/source/chromium/src/out/Headless + args: + warn: false + + - name: Creating Headless Chromium Configuration + copy: + content: | + import("//build/args/headless.gn") + blink_symbol_level = 0 + dcheck_always_on = false + disable_histogram_support = false + enable_basic_print_dialog = false + enable_basic_printing = true + enable_keystone_registration_framework = false + enable_linux_installer = false + enable_media_remoting = false + enable_one_click_signin = false + ffmpeg_branding = "Chrome" + is_component_build = false + is_debug = false + is_official_build = true + proprietary_codecs = true + symbol_level = 0 + target_cpu = "x64" + target_os = "linux" + use_brlapi = 0 + use_sysroot = true + v8_symbol_level = 0 + v8_target_cpu = "x64" + dest: /srv/source/chromium/src/out/Headless/args.gn + + - name: Generating Headless Chromium Configuration + shell: | + gn gen out/Headless + args: + chdir: /srv/source/chromium/src + + - name: Compiling Headless Chromium + shell: | + autoninja -C out/Headless headless_shell + args: + chdir: /srv/source/chromium/src + + - name: Getting Chromium Version + shell: | + sed --regexp-extended 's~[^0-9]+~~g' chrome/VERSION | tr '\n' '.' | sed 's~[.]$~~' + args: + chdir: /srv/source/chromium/src + warn: false + register: version + + - name: Striping Symbols from Chromium Binary + shell: | + strip -o /srv/build/chromium/chromium-{{ version.stdout | quote }} out/Headless/headless_shell + args: + chdir: /srv/source/chromium/src + + - name: Compressing Chromium + shell: | + brotli --best --force {{ item }} + args: + chdir: /srv/build/chromium + with_items: + - "chromium-{{ version.stdout }}" + + - name: Downloading Chromium + fetch: + src: "/srv/build/chromium/{{ item }}" + dest: ../../../bin/ + flat: yes + fail_on_missing: true + with_items: + - "chromium-{{ version.stdout }}.br" + + - name: Archiving OpenGL ES driver + shell: | + tar --directory /srv/source/chromium/src/out/Headless --create --file swiftshader.tar libEGL.so libGLESv2.so libvk_swiftshader.so libvulkan.so.1 vk_swiftshader_icd.json + args: + chdir: /srv/build/chromium + creates: /srv/build/chromium/swiftshader.tar + warn: false + + - name: Compressing OpenGL ES driver + shell: | + brotli --best --force swiftshader.tar + args: + chdir: /srv/build/chromium + creates: /srv/build/chromium/swiftshader.tar.br + + - name: Downloading OpenGL ES driver + fetch: + src: /srv/build/chromium/swiftshader.tar.br + dest: ../../../bin/ + flat: yes + fail_on_missing: true + +- name: Teardown AWS + hosts: localhost + gather_facts: false + + tasks: + - name: Terminating EC2 Instance + amazon.aws.ec2_instance: + wait: yes + state: absent + instance_ids: '{{ ec2.instance_ids }}' + region: "{{ region }}" + + - name: Deleting Security Group + amazon.aws.ec2_group: + name: Chromium + state: absent + region: "{{ region }}" + + - name: Deleting EC2 Key Pair + amazon.aws.ec2_key: + name: ansible + state: absent + region: "{{ region }}" + + - name: Deleting SSH Key + file: + path: "../{{ item }}" + state: absent + with_items: + - ansible.pem + - ansible.pem.pub diff --git a/bin/aws.tar.br b/bin/aws.tar.br new file mode 100644 index 0000000..71c8b52 Binary files /dev/null and b/bin/aws.tar.br differ diff --git a/bin/chromium.br b/bin/chromium.br new file mode 100644 index 0000000..a968f4a Binary files /dev/null and b/bin/chromium.br differ diff --git a/bin/swiftshader.tar.br b/bin/swiftshader.tar.br new file mode 100644 index 0000000..0589b94 Binary files /dev/null and b/bin/swiftshader.tar.br differ diff --git a/incrementVersion b/incrementVersion new file mode 100644 index 0000000..adf86cf --- /dev/null +++ b/incrementVersion @@ -0,0 +1,28 @@ +# incrementVersion.sh OLD_VERSION NEW_VERSION +# Example: incrementVersion 16.1.0 16.2.0 + +OLD_VERSION=$1 +NEW_VERSION=$2 + +sed -i "s/$OLD_VERSION/$NEW_VERSION/" _/ansible/inventory.ini +sed -i "s/\"puppeteer-core\": \"$OLD_VERSION\"/\"puppeteer-core\": \"$NEW_VERSION\"/g" package.json +sed -i "s/puppeteer-core@$OLD_VERSION/puppeteer-core@$NEW_VERSION/" Makefile + +echo "Version number incremented $OLD_VERSION -> $NEW_VERSION. + +1) Check for a new version of 'chromium' included with 'puppeteer': + a) https://github.com/puppeteer/puppeteer/blob/main/src/revisions.ts + b) https://cr-rev.appspot.com/_ah/api/crrev/v1/redirect/####### + c) https://omahaproxy.appspot.com/ + +2) If the 'chromium' version has been incremented, please compile a new version of 'chromium': + a) cd _/ansible && make chromium + b) Rename the new chromium binary + +3) Please also update README.md#Versioning + +4) Merge the PR and deploy to npm + a) Test the new version using 'npm run test' + b) Push the PR to Github and merge it + c) Checkout the main branch + d) Run 'npm version $NEW_VERSION' to publish the package to npm." diff --git a/package.json b/package.json new file mode 100644 index 0000000..c0b0593 --- /dev/null +++ b/package.json @@ -0,0 +1,67 @@ +{ + "name": "@sparticuz/chrome-aws-lambda", + "version": "17.1.3", + "author": { + "name": "Kyle McNally" + }, + "license": "MIT", + "description": "Chromium Binary for AWS Lambda and Google Cloud Functions, forked from @alixaxel/chrome-aws-lambda", + "main": "build/index.js", + "types": "build/index.d.ts", + "files": [ + "bin", + "build", + "typings" + ], + "engines": { + "node": ">= 14" + }, + "scripts": { + "test": "make clean && make && make pretest && make test", + "build": "rm -rf build && tsc -p tsconfig.json", + "postversion": "git push && git push --tags && npm publish", + "prepack": "npm run build", + "preversion": "npm run build" + }, + "devDependencies": { + "@types/node": "^16.11.49", + "@types/tar-fs": "^2.0.1", + "clean-modules": "^2.0.6", + "puppeteer-core": "17.1.3", + "typescript": "^4.6.4" + }, + "peerDependencies": { + "puppeteer-core": "17.1.3" + }, + "bugs": { + "url": "https://github.com/Sparticuz/chrome-aws-lambda/issues" + }, + "homepage": "https://github.com/Sparticuz/chrome-aws-lambda", + "repository": { + "type": "git", + "url": "git://github.com/Sparticuz/chrome-aws-lambda.git" + }, + "keywords": [ + "aws", + "browser", + "chrome", + "chromium", + "lambda", + "puppeteer", + "serverless" + ], + "prettier": { + "arrowParens": "always", + "bracketSpacing": true, + "jsxBracketSameLine": false, + "printWidth": 140, + "semi": true, + "singleQuote": true, + "tabWidth": 2, + "trailingComma": "es5", + "useTabs": false + }, + "dependencies": { + "tar-fs": "^2.1.1" + } +} diff --git a/source/hooks/adblock.ts b/source/hooks/adblock.ts new file mode 100644 index 0000000..b4f32d1 --- /dev/null +++ b/source/hooks/adblock.ts @@ -0,0 +1,48 @@ +import { promises } from 'fs'; +import { get } from 'https'; +import { Page } from 'puppeteer-core'; + +let adblocker: any = null; + +/** + * Enables ad blocking in page. + * Requires `@cliqz/adblocker-puppeteer` package. + * + * @param page - Page to hook to. + */ +export = async function (page: Page): Promise { + if (adblocker == null) { + const { fullLists, PuppeteerBlocker } = require('@cliqz/adblocker-puppeteer'); + + adblocker = await PuppeteerBlocker.fromLists( + (url: string) => { + return new Promise((resolve, reject) => { + return get(url, (response) => { + if (response.statusCode !== 200) { + return reject(`Unexpected status code: ${response.statusCode}.`); + } + + let result = ''; + + response.on('data', (chunk) => { + result += chunk; + }); + + response.on('end', () => { + return resolve({ text: () => result }); + }); + }); + }); + }, + fullLists, + { enableCompression: false }, + { + path: '/tmp/adblock.bin', + read: promises.readFile, + write: promises.writeFile, + } + ); + } + + return await adblocker.enableBlockingInPage(page).then(() => page); +} diff --git a/source/hooks/agent.ts b/source/hooks/agent.ts new file mode 100644 index 0000000..c21fbcd --- /dev/null +++ b/source/hooks/agent.ts @@ -0,0 +1,16 @@ +import { Page } from 'puppeteer-core'; + +/** + * Removes `Headless` from the User Agent string, if present. + * + * @param page - Page to hook to. + */ +export = async function (page: Page): Promise { + let result = await page.browser().userAgent(); + + if (result.includes('Headless') === true) { + await page.setUserAgent(result.replace('Headless', '')); + } + + return page; +}; diff --git a/source/hooks/chrome.ts b/source/hooks/chrome.ts new file mode 100644 index 0000000..337c12d --- /dev/null +++ b/source/hooks/chrome.ts @@ -0,0 +1,189 @@ +import { Page } from 'puppeteer-core'; +import { Writeable } from '../../typings/chrome-aws-lambda'; + +/** + * Mocks the global `chrome` property to mimic headful Chrome. + * + * @param page - Page to hook to. + */ +export = async function (page: Page): Promise { + const handler = () => { + let alpha = Date.now(); + let delta = Math.floor(500 * Math.random()); + + if ((window as any).chrome === undefined) { + Object.defineProperty(window, 'chrome', { + configurable: false, + enumerable: true, + value: {}, + writable: true, + }); + } + + /** + * https://github.com/berstend/puppeteer-extra/blob/master/packages/puppeteer-extra-plugin-stealth/evasions/chrome.app/index.js + */ + if ((window as any).chrome.app === undefined) { + const InvocationError = (callback: string) => { + /** + * Truncates every line of the stack trace (with the exception of the first), until `search` is found. + */ + const truncateStackTrace = (error: Error, search: string) => { + const stack = error.stack.split('\n'); + const index = stack.findIndex((value: string) => value.trim().startsWith(search)); + + if (index > 0) { + error.stack = [stack[0], ...stack.slice(index + 1)].join('\n'); + } + + return error; + }; + + return truncateStackTrace(new TypeError(`Error in invocation of app.${callback}()`), `at ${callback} (eval at `); + }; + + Object.defineProperty((window as any).chrome, 'app', { + value: { + InstallState: { + DISABLED: 'disabled', + INSTALLED: 'installed', + NOT_INSTALLED: 'not_installed', + }, + RunningState: { + CANNOT_RUN: 'cannot_run', + READY_TO_RUN: 'ready_to_run', + RUNNING: 'running', + }, + get isInstalled() { + return false; + }, + getDetails: function getDetails(): null { + if (arguments.length > 0) { + throw InvocationError('getDetails'); + } + + return null; + }, + getIsInstalled: function getIsInstalled() { + if (arguments.length > 0) { + throw InvocationError('getIsInstalled'); + } + + return false; + }, + runningState: function runningState() { + if (arguments.length > 0) { + throw InvocationError('runningState'); + } + + return 'cannot_run'; + }, + }, + }); + } + + let timing: Partial = { + navigationStart: alpha + 1 * delta, + domContentLoadedEventEnd: alpha + 4 * delta, + responseStart: alpha + 2 * delta, + loadEventEnd: alpha + 5 * delta, + }; + + if (window.performance?.timing !== undefined) { + timing = window.performance.timing; + } + + /** + * https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth/evasions/chrome.csi + */ + if ((window as any).chrome.csi === undefined) { + Object.defineProperty((window as any).chrome, 'csi', { + value: function csi() { + return { + startE: timing.navigationStart, + onloadT: timing.domContentLoadedEventEnd, + pageT: Date.now() - timing.navigationStart + Math.random().toFixed(3), + tran: 15, + }; + }, + }); + } + + /** + * https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth/evasions/chrome.loadTimes + */ + if ((window as any).chrome.loadTimes === undefined) { + let navigation: Writeable> = { + nextHopProtocol: 'h2', + startTime: 3 * delta, + type: 'other' as any, + }; + + if (typeof window.performance?.getEntriesByType === 'function') { + let entries = { + navigation: window.performance.getEntriesByType('navigation') as PerformanceNavigationTiming[], + paint: window.performance.getEntriesByType('paint') as PerformanceNavigationTiming[], + }; + + if (entries.navigation.length > 0) { + navigation = entries.navigation.shift(); + } + + if (entries.paint.length > 0) { + navigation.startTime = entries.paint.shift().startTime; + } + } + + Object.defineProperty((window as any).chrome, 'loadTimes', { + value: function loadTimes() { + return { + get commitLoadTime() { + return timing.responseStart / 1000; + }, + get connectionInfo() { + return navigation.nextHopProtocol; + }, + get finishDocumentLoadTime() { + return timing.domContentLoadedEventEnd / 1000; + }, + get finishLoadTime() { + return timing.loadEventEnd / 1000; + }, + get firstPaintAfterLoadTime() { + return 0; + }, + get firstPaintTime() { + return parseFloat(((navigation.startTime + (window.performance?.timeOrigin ?? timing.navigationStart)) / 1000).toFixed(3)); + }, + get navigationType() { + return navigation.type; + }, + get npnNegotiatedProtocol() { + return ['h2', 'hq'].includes(navigation.nextHopProtocol) ? navigation.nextHopProtocol : 'unknown'; + }, + get requestTime() { + return timing.navigationStart / 1000; + }, + get startLoadTime() { + return timing.navigationStart / 1000; + }, + get wasAlternateProtocolAvailable() { + return false; + }, + get wasFetchedViaSpdy() { + return ['h2', 'hq'].includes(navigation.nextHopProtocol); + }, + get wasNpnNegotiated() { + return ['h2', 'hq'].includes(navigation.nextHopProtocol); + }, + }; + }, + }); + }; + } + + await page.evaluate(handler); + await page.evaluateOnNewDocument(handler); + + return page; +} diff --git a/source/hooks/languages.ts b/source/hooks/languages.ts new file mode 100644 index 0000000..3e5fb30 --- /dev/null +++ b/source/hooks/languages.ts @@ -0,0 +1,23 @@ +import { Page } from 'puppeteer-core'; + +/** + * Emulates `en-US` language. + * + * @param page - Page to hook to. + */ +export = async function (page: Page): Promise { + const handler = () => { + Object.defineProperty(Object.getPrototypeOf(navigator), 'language', { + get: () => 'en-US', + }); + + Object.defineProperty(Object.getPrototypeOf(navigator), 'languages', { + get: () => ['en-US', 'en'], + }); + }; + + await page.evaluate(handler); + await page.evaluateOnNewDocument(handler); + + return page; +} diff --git a/source/hooks/permissions.ts b/source/hooks/permissions.ts new file mode 100644 index 0000000..28101cf --- /dev/null +++ b/source/hooks/permissions.ts @@ -0,0 +1,28 @@ +import { Page } from 'puppeteer-core'; + +/** + * Emulates `denied` state for all permission queries. + * + * @param page - Page to hook to. + */ +export = async function (page: Page): Promise { + const handler = () => { + let query = window.navigator.permissions.query; + + (Permissions as any).prototype.query = function (parameters: PermissionDescriptor) { + if (parameters?.name?.length > 0) { + return Promise.resolve({ + onchange: null, + state: 'denied', + }); + } + + return query(parameters); + }; + }; + + await page.evaluate(handler); + await page.evaluateOnNewDocument(handler); + + return page; +} diff --git a/source/hooks/timezone.ts b/source/hooks/timezone.ts new file mode 100644 index 0000000..760f371 --- /dev/null +++ b/source/hooks/timezone.ts @@ -0,0 +1,10 @@ +import { Page } from 'puppeteer-core'; + +/** + * Emulates UTC timezone. + * + * @param page - Page to hook to. + */ +export = function (page: Page): Promise { + return page.emulateTimezone('UTC').then(() => page); +} diff --git a/source/hooks/webdriver.ts b/source/hooks/webdriver.ts new file mode 100644 index 0000000..a2b2c7b --- /dev/null +++ b/source/hooks/webdriver.ts @@ -0,0 +1,19 @@ +import { Page } from 'puppeteer-core'; + +/** + * Removes global `webdriver` property to mimic headful Chrome. + * + * @param page - Page to hook to. + */ +export = async function (page: Page): Promise { + const handler = () => { + Object.defineProperty(Object.getPrototypeOf(navigator), 'webdriver', { + get: () => false, + }); + }; + + await page.evaluate(handler); + await page.evaluateOnNewDocument(handler); + + return page; +} diff --git a/source/hooks/window.ts b/source/hooks/window.ts new file mode 100644 index 0000000..37a3401 --- /dev/null +++ b/source/hooks/window.ts @@ -0,0 +1,47 @@ +import { Page } from 'puppeteer-core'; + +/** + * Patches window outer dimentions to mimic headful Chrome. + * + * @param page - Page to hook to. + */ +export = async function (page: Page): Promise { + const handler = () => { + if (window.outerWidth === 0) { + Object.defineProperty(window, 'outerWidth', { + get: () => screen.availWidth, + }); + } + + if (window.outerHeight === 0) { + Object.defineProperty(window, 'outerHeight', { + get: () => screen.availHeight, + }); + } + + if (window.screenX === 0) { + Object.defineProperty(window, 'screenX', { + get: () => screen.width - screen.availWidth, + }); + + Object.defineProperty(window, 'screenLeft', { + get: () => screenX, + }); + } + + if (window.screenY === 0) { + Object.defineProperty(window, 'screenY', { + get: () => screen.height - screen.availHeight, + }); + + Object.defineProperty(window, 'screenTop', { + get: () => screenY, + }); + } + }; + + await page.evaluate(handler); + await page.evaluateOnNewDocument(handler); + + return page; +} diff --git a/source/index.ts b/source/index.ts new file mode 100644 index 0000000..e222c69 --- /dev/null +++ b/source/index.ts @@ -0,0 +1,219 @@ +/// + +import { access, createWriteStream, existsSync, mkdirSync, readdirSync, symlink, unlinkSync } from 'fs'; +import { IncomingMessage } from 'http'; +import LambdaFS from './lambdafs'; +import { join } from 'path'; +import { PuppeteerNode, Viewport } from 'puppeteer-core'; +import { URL } from 'url'; + +if (/^AWS_Lambda_nodejs(?:10|12|14|16)[.]x$/.test(process.env.AWS_EXECUTION_ENV) === true) { + if (process.env.FONTCONFIG_PATH === undefined) { + process.env.FONTCONFIG_PATH = '/tmp/aws'; + } + + if (process.env.LD_LIBRARY_PATH === undefined) { + process.env.LD_LIBRARY_PATH = '/tmp/aws/lib'; + } else if (process.env.LD_LIBRARY_PATH.startsWith('/tmp/aws/lib') !== true) { + process.env.LD_LIBRARY_PATH = [...new Set(['/tmp/aws/lib', ...process.env.LD_LIBRARY_PATH.split(':')])].join(':'); + } +} + +class Chromium { + /** + * Downloads or symlinks a custom font and returns its basename, patching the environment so that Chromium can find it. + * If not running on AWS Lambda nor Google Cloud Functions, `null` is returned instead. + */ + static font(input: string): Promise { + if (Chromium.headless !== true) { + return null; + } + + if (process.env.HOME === undefined) { + process.env.HOME = '/tmp'; + } + + if (existsSync(`${process.env.HOME}/.fonts`) !== true) { + mkdirSync(`${process.env.HOME}/.fonts`); + } + + return new Promise((resolve, reject) => { + if (/^https?:[/][/]/i.test(input) !== true) { + input = `file://${input}`; + } + + const url = new URL(input); + const output = `${process.env.HOME}/.fonts/${url.pathname.split('/').pop()}`; + + if (existsSync(output) === true) { + return resolve(output.split('/').pop()); + } + + if (url.protocol === 'file:') { + access(url.pathname, (error) => { + if (error != null) { + return reject(error); + } + + symlink(url.pathname, output, (error) => { + return error != null ? reject(error) : resolve(url.pathname.split('/').pop()); + }); + }); + } else { + let handler = url.protocol === 'http:' ? require('http').get : require('https').get; + + handler(input, (response: IncomingMessage) => { + if (response.statusCode !== 200) { + return reject(`Unexpected status code: ${response.statusCode}.`); + } + + const stream = createWriteStream(output); + + stream.once('error', (error) => { + return reject(error); + }); + + response.on('data', (chunk) => { + stream.write(chunk); + }); + + response.once('end', () => { + stream.end(() => { + return resolve(url.pathname.split('/').pop()); + }); + }); + }); + } + }); + } + + /** + * Returns a list of additional Chromium flags recommended for serverless environments. + * The canonical list of flags can be found on https://peter.sh/experiments/chromium-command-line-switches/. + */ + static get args(): string[] { + const result = [ + '--allow-running-insecure-content', // https://source.chromium.org/search?q=lang:cpp+symbol:kAllowRunningInsecureContent&ss=chromium + '--autoplay-policy=user-gesture-required', // https://source.chromium.org/search?q=lang:cpp+symbol:kAutoplayPolicy&ss=chromium + '--disable-background-timer-throttling', + '--disable-component-update', // https://source.chromium.org/search?q=lang:cpp+symbol:kDisableComponentUpdate&ss=chromium + '--disable-domain-reliability', // https://source.chromium.org/search?q=lang:cpp+symbol:kDisableDomainReliability&ss=chromium + '--disable-features=AudioServiceOutOfProcess,IsolateOrigins,site-per-process', // https://source.chromium.org/search?q=file:content_features.cc&ss=chromium + '--disable-ipc-flooding-protection', + '--disable-print-preview', // https://source.chromium.org/search?q=lang:cpp+symbol:kDisablePrintPreview&ss=chromium + '--disable-dev-shm-usage', + '--disable-setuid-sandbox', // https://source.chromium.org/search?q=lang:cpp+symbol:kDisableSetuidSandbox&ss=chromium + '--disable-site-isolation-trials', // https://source.chromium.org/search?q=lang:cpp+symbol:kDisableSiteIsolation&ss=chromium + '--disable-speech-api', // https://source.chromium.org/search?q=lang:cpp+symbol:kDisableSpeechAPI&ss=chromium + '--disable-web-security', // https://source.chromium.org/search?q=lang:cpp+symbol:kDisableWebSecurity&ss=chromium + '--disk-cache-size=33554432', // https://source.chromium.org/search?q=lang:cpp+symbol:kDiskCacheSize&ss=chromium + '--enable-features=SharedArrayBuffer', // https://source.chromium.org/search?q=file:content_features.cc&ss=chromium + '--hide-scrollbars', // https://source.chromium.org/search?q=lang:cpp+symbol:kHideScrollbars&ss=chromium + '--ignore-gpu-blocklist', // https://source.chromium.org/search?q=lang:cpp+symbol:kIgnoreGpuBlocklist&ss=chromium + '--in-process-gpu', // https://source.chromium.org/search?q=lang:cpp+symbol:kInProcessGPU&ss=chromium + '--mute-audio', // https://source.chromium.org/search?q=lang:cpp+symbol:kMuteAudio&ss=chromium + '--no-default-browser-check', // https://source.chromium.org/search?q=lang:cpp+symbol:kNoDefaultBrowserCheck&ss=chromium + '--no-first-run', + '--no-pings', // https://source.chromium.org/search?q=lang:cpp+symbol:kNoPings&ss=chromium + '--no-sandbox', // https://source.chromium.org/search?q=lang:cpp+symbol:kNoSandbox&ss=chromium + '--no-zygote', // https://source.chromium.org/search?q=lang:cpp+symbol:kNoZygote&ss=chromium + '--use-gl=angle', // https://chromium.googlesource.com/chromium/src/+/main/docs/gpu/swiftshader.md + '--use-angle=swiftshader', // https://chromium.googlesource.com/chromium/src/+/main/docs/gpu/swiftshader.md + '--window-size=1920,1080', // https://source.chromium.org/search?q=lang:cpp+symbol:kWindowSize&ss=chromium + ]; + + if (Chromium.headless === true) { + result.push('--single-process'); // https://source.chromium.org/search?q=lang:cpp+symbol:kSingleProcess&ss=chromium + } else { + result.push('--start-maximized'); // https://source.chromium.org/search?q=lang:cpp+symbol:kStartMaximized&ss=chromium + } + + return result; + } + + /** + * Returns sensible default viewport settings. + */ + static get defaultViewport(): Required { + return { + deviceScaleFactor: 1, + hasTouch: false, + height: 1080, + isLandscape: true, + isMobile: false, + width: 1920, + }; + } + + /** + * Inflates the current version of Chromium and returns the path to the binary. + * If not running on AWS Lambda nor Google Cloud Functions, `null` is returned instead. + */ + static get executablePath(): Promise { + if (Chromium.headless !== true) { + return Promise.resolve(null); + } + + if (existsSync('/tmp/chromium') === true) { + for (const file of readdirSync('/tmp')) { + if (file.startsWith('core.chromium') === true) { + unlinkSync(`/tmp/${file}`); + } + } + + return Promise.resolve('/tmp/chromium'); + } + + const input = join(__dirname, '..', 'bin'); + const promises = [ + LambdaFS.inflate(`${input}/chromium.br`), + LambdaFS.inflate(`${input}/swiftshader.tar.br`), + ]; + + if (/^AWS_Lambda_nodejs(?:10|12|14|16)[.]x$/.test(process.env.AWS_EXECUTION_ENV) === true) { + promises.push(LambdaFS.inflate(`${input}/aws.tar.br`)); + } + + return Promise.all(promises).then((result) => result.shift()); + } + + /** + * Returns a boolean indicating if we are running on AWS Lambda or Google Cloud Functions. + * False is returned if Serverless environment variables `IS_LOCAL` or `IS_OFFLINE` are set. + */ + static get headless() { + if (process.env.IS_LOCAL !== undefined || process.env.IS_OFFLINE !== undefined) { + return false; + } + + const environments = [ + 'AWS_LAMBDA_FUNCTION_NAME', + 'FUNCTION_NAME', + 'FUNCTION_TARGET', + 'FUNCTIONS_EMULATOR', + ]; + + return environments.some((key) => process.env[key] !== undefined); + } + + /** + * Overloads puppeteer with useful methods and returns the resolved package. + */ + static get puppeteer(): PuppeteerNode { + for (const overload of ['Browser', 'BrowserContext', 'ElementHandle', 'Frame', 'Page']) { + require(`${__dirname}/puppeteer/lib/${overload}`); + } + + try { + return require('puppeteer'); + } catch (error: any) { + if (error.code !== 'MODULE_NOT_FOUND') { + throw error; + } + + return require('puppeteer-core'); + } + } +} + +export = Chromium; diff --git a/source/lambdafs.ts b/source/lambdafs.ts new file mode 100644 index 0000000..f0cb1f8 --- /dev/null +++ b/source/lambdafs.ts @@ -0,0 +1,61 @@ +import { createReadStream, createWriteStream, existsSync } from 'fs'; +import { tmpdir } from 'os'; +import { basename, join } from 'path'; +import { extract } from 'tar-fs'; +import { createBrotliDecompress, createUnzip } from 'zlib'; + +class LambdaFS { + /** + * Decompresses a (tarballed) Brotli or Gzip compressed file and returns the path to the decompressed file/folder. + * + * @param filePath Path of the file to decompress. + */ + static inflate(filePath: string): Promise { + const output = filePath.includes("swiftshader") ? tmpdir() : join(tmpdir(), basename(filePath).replace(/[.](?:t(?:ar(?:[.](?:br|gz))?|br|gz)|br|gz)$/i, '')); + + return new Promise((resolve, reject) => { + if (filePath.includes("swiftshader")) { + if (existsSync(`${output}/libGLESv2.so`)) { + return resolve(output); + } + } else { + if (existsSync(output) === true) { + return resolve(output); + } + } + + let source = createReadStream(filePath, { highWaterMark: 2 ** 23 }); + let target = null; + + if (/[.](?:t(?:ar(?:[.](?:br|gz))?|br|gz))$/i.test(filePath) === true) { + target = extract(output); + + target.once('finish', () => { + return resolve(output); + }); + } else { + target = createWriteStream(output, { mode: 0o700 }); + } + + source.once('error', (error: Error) => { + return reject(error); + }); + + target.once('error', (error: Error) => { + return reject(error); + }); + + target.once('close', () => { + return resolve(output); + }); + + if (/(?:br|gz)$/i.test(filePath) === true) { + source.pipe(/br$/i.test(filePath) ? createBrotliDecompress({ chunkSize: 2 ** 21 }) : createUnzip({ chunkSize: 2 ** 21 })).pipe(target); + } else { + source.pipe(target); + } + }); + } +} + +export = LambdaFS; diff --git a/source/puppeteer/lib/Browser.ts b/source/puppeteer/lib/Browser.ts new file mode 100644 index 0000000..6ed3eff --- /dev/null +++ b/source/puppeteer/lib/Browser.ts @@ -0,0 +1,43 @@ +import { Browser, Page } from 'puppeteer-core'; +import { Hook, Prototype } from '../../../typings/chrome-aws-lambda'; + +let Super: Prototype = null; + +try { + Super = require('puppeteer/lib/cjs/puppeteer/common/Browser.js').Browser; +} catch (error) { + Super = require('puppeteer-core/lib/cjs/puppeteer/common/Browser.js').Browser; +} + +Super.prototype.defaultPage = async function (...hooks: Hook[]) { + let page: Page = null; + let pages: Page[] = await this.pages(); + + if (pages.length === 0) { + pages = [await this.newPage()]; + } + + page = pages.shift(); + + if (hooks != null && Array.isArray(hooks) === true) { + for (let hook of hooks) { + page = await hook(page); + } + } + + return page; +}; + +let newPage: any = Super.prototype.newPage; + +Super.prototype.newPage = async function (...hooks: Hook[]) { + let page: Page = await newPage.apply(this, arguments); + + if (hooks != null && Array.isArray(hooks) === true) { + for (let hook of hooks) { + page = await hook(page); + } + } + + return page; +}; diff --git a/source/puppeteer/lib/BrowserContext.ts b/source/puppeteer/lib/BrowserContext.ts new file mode 100644 index 0000000..774ae4a --- /dev/null +++ b/source/puppeteer/lib/BrowserContext.ts @@ -0,0 +1,43 @@ +import { BrowserContext, Page } from 'puppeteer-core'; +import { Hook, Prototype } from '../../../typings/chrome-aws-lambda'; + +let Super: Prototype = null; + +try { + Super = require('puppeteer/lib/cjs/puppeteer/common/Browser.js').BrowserContext; +} catch (error) { + Super = require('puppeteer-core/lib/cjs/puppeteer/common/Browser.js').BrowserContext; +} + +Super.prototype.defaultPage = async function (...hooks: Hook[]) { + let page: Page = null; + let pages: Page[] = await this.pages(); + + if (pages.length === 0) { + pages = [await this.newPage()]; + } + + page = pages.shift(); + + if (hooks != null && Array.isArray(hooks) === true) { + for (let hook of hooks) { + page = await hook(page); + } + } + + return page; +}; + +let newPage: any = Super.prototype.newPage; + +Super.prototype.newPage = async function (...hooks: Hook[]) { + let page: Page = await newPage.apply(this, arguments); + + if (hooks != null && Array.isArray(hooks) === true) { + for (let hook of hooks) { + page = await hook(page); + } + } + + return page; +}; diff --git a/source/puppeteer/lib/ElementHandle.ts b/source/puppeteer/lib/ElementHandle.ts new file mode 100644 index 0000000..aa75fcf --- /dev/null +++ b/source/puppeteer/lib/ElementHandle.ts @@ -0,0 +1,557 @@ +import { ElementHandle, EvaluateFunc, HTTPRequest, HTTPResponse, Page, WaitForOptions, WaitTimeoutOptions } from 'puppeteer-core'; +import { Prototype } from '../../../typings/chrome-aws-lambda'; + +let Super: Prototype = null; + +try { + Super = require('puppeteer/lib/cjs/puppeteer/common/ElementHandle.js').ElementHandle; +} catch (error) { + Super = require('puppeteer-core/lib/cjs/puppeteer/common/ElementHandle.js').ElementHandle; +} + +Super.prototype.clear = function () { + return this.click({ clickCount: 3 }).then(() => this.press('Backspace')); +}; + +Super.prototype.clickAndWaitForNavigation = function (options?: WaitForOptions) { + options = options ?? { + waitUntil: [ + 'load', + ], + }; + + let promises: [Promise, Promise] = [ + ((this as any)._page as Page).waitForNavigation(options), + this.click(), + ]; + + return Promise.all(promises).then((value) => value.shift() as HTTPResponse); +}; + +Super.prototype.clickAndWaitForRequest = function (predicate: string | RegExp | ((request: HTTPRequest) => boolean | Promise), options?: WaitTimeoutOptions) { + let callback = (request: HTTPRequest) => { + let url = request.url(); + + if (typeof predicate === 'string' && predicate.includes('*') === true) { + predicate = new RegExp(predicate.replace(/[-\/\\^$+?.()|[\]{}]/g, '\\$&').replace(/[*]+/g, '.*?'), 'g'); + } + + if (predicate instanceof RegExp) { + return predicate.test(url); + } + + return predicate === url; + }; + + let promises: [Promise, Promise] = [ + ((this as any)._page as Page).waitForRequest((typeof predicate === 'function') ? predicate : callback, options), + this.click(), + ]; + + return Promise.all(promises).then((value) => value.shift() as HTTPRequest); +}; + +Super.prototype.clickAndWaitForResponse = function (predicate: string | RegExp | ((request: HTTPResponse) => boolean | Promise), options?: WaitTimeoutOptions) { + let callback = (request: HTTPResponse) => { + let url = request.url(); + + if (typeof predicate === 'string' && predicate.includes('*') === true) { + predicate = new RegExp(predicate.replace(/[-\/\\^$+?.()|[\]{}]/g, '\\$&').replace(/[*]+/g, '.*?'), 'g'); + } + + if (predicate instanceof RegExp) { + return predicate.test(url); + } + + return predicate === url; + }; + + let promises: [Promise, Promise] = [ + ((this as any)._page as Page).waitForResponse((typeof predicate === 'function') ? predicate : callback, options), + this.click(), + ]; + + return Promise.all(promises).then((value) => value.shift() as HTTPResponse); +}; + +Super.prototype.fillFormByLabel = function >(data: T) { + let callback = (node: HTMLFormElement, data: T) => { + if (node.nodeName.toLowerCase() !== 'form') { + throw new Error('Element is not a
element.'); + } + + let result: Record = {}; + + for (let [key, value] of Object.entries(data)) { + let selector = [ + `id(string(//label[normalize-space(.) = "${key}"]/@for))`, + `//label[normalize-space(.) = "${key}"]//*[self::input or self::select or self::textarea]`, + ].join(' | '); + + if (result.hasOwnProperty(key) !== true) { + result[key] = []; + } + + let element: Node = null; + let elements: HTMLInputElement[] = []; + let iterator = document.evaluate(selector, node, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null); + + while ((element = iterator.iterateNext()) != null) { + elements.push(element as HTMLInputElement); + } + + if (elements.length === 0) { + throw new Error(`No elements match the selector '${selector}' for '${key}'.`); + } + + let type = (elements[0].getAttribute('type') || elements[0].nodeName).toLowerCase(); + let values: (boolean | string)[] = (Array.isArray(value) === true) ? value as (boolean | string)[] : [value] as (boolean | string)[]; + + if (type === 'file') { + throw new Error(`Input element of type 'file' is not supported.`); + } + + for (let element of elements) { + try { + element.focus(); + element.dispatchEvent(new Event('focus')); + } catch (error) { + } + + if (type === 'select') { + element.value = undefined; + + for (let index of ['value', 'label'] as ['value', 'label']) { + if (result[key].length > 0) { + break; + } + + for (let option of Array.from((element as unknown as HTMLSelectElement).options)) { + option.selected = values.includes(option[index]); + + if (option.selected === true) { + result[key].push(option.value); + + if (element.multiple !== true) { + break; + } + } + } + } + } else if (type === 'checkbox' || type === 'radio') { + element.checked = (value === true) || values.includes(element.value); + + if (element.checked === true) { + result[key].push(element.value); + } + } else if (typeof value === 'string') { + if (element.isContentEditable === true) { + result[key].push(element.textContent = value); + } else { + result[key].push(element.value = value); + } + } + + for (let trigger of ['input', 'change']) { + element.dispatchEvent(new Event(trigger, { 'bubbles': true })); + } + + try { + element.blur(); + element.dispatchEvent(new Event('blur')); + } catch (error) { + } + + if (type === 'checkbox' || type === 'radio') { + break; + } + } + } + + return result; + }; + + return this.evaluate(callback as unknown as EvaluateFunc<[ElementHandle, T]>, data) as any; +}; + +Super.prototype.fillFormByName = function >(data: T) { + let callback = (node: HTMLFormElement, data: T, heuristic: 'css' | 'label' | 'name' | 'xpath' = 'css') => { + if (node.nodeName.toLowerCase() !== 'form') { + throw new Error('Element is not a element.'); + } + + let result: Record = {}; + + for (let [key, value] of Object.entries(data)) { + let selector = `[name="${key}"]`; + + if (result.hasOwnProperty(key) !== true) { + result[key] = []; + } + + let elements: HTMLInputElement[] = Array.from(node.querySelectorAll(selector)); + + if (elements.length === 0) { + throw new Error(`No elements match the selector '${selector}' for '${key}'.`); + } + + let type = (elements[0].getAttribute('type') || elements[0].nodeName).toLowerCase(); + let values: (boolean | string)[] = (Array.isArray(value) === true) ? value as (boolean | string)[] : [value] as (boolean | string)[]; + + if (type === 'file') { + throw new Error(`Input element of type 'file' is not supported.`); + } + + for (let element of elements) { + try { + element.focus(); + element.dispatchEvent(new Event('focus')); + } catch (error) { + } + + if (type === 'select') { + element.value = undefined; + + for (let index of ['value', 'label'] as ['value', 'label']) { + if (result[key].length > 0) { + break; + } + + for (let option of Array.from((element as unknown as HTMLSelectElement).options)) { + option.selected = values.includes(option[index]); + + if (option.selected === true) { + result[key].push(option.value); + + if (element.multiple !== true) { + break; + } + } + } + } + } else if (type === 'checkbox' || type === 'radio') { + element.checked = (value === true) || values.includes(element.value); + + if (element.checked === true) { + result[key].push(element.value); + } + } else if (typeof value === 'string') { + if (element.isContentEditable === true) { + result[key].push(element.textContent = value); + } else { + result[key].push(element.value = value); + } + } + + for (let trigger of ['input', 'change']) { + element.dispatchEvent(new Event(trigger, { 'bubbles': true })); + } + + try { + element.blur(); + element.dispatchEvent(new Event('blur')); + } catch (error) { + } + + if (type === 'checkbox' || type === 'radio') { + break; + } + } + } + + return result; + }; + + return this.evaluate(callback as unknown as EvaluateFunc<[ElementHandle, T]>, data) as any; +}; + +Super.prototype.fillFormBySelector = function >(data: T) { + let callback = (node: HTMLFormElement, data: T, heuristic: 'css' | 'label' | 'name' | 'xpath' = 'css') => { + if (node.nodeName.toLowerCase() !== 'form') { + throw new Error('Element is not a element.'); + } + + let result: Record = {}; + + for (let [key, value] of Object.entries(data)) { + let selector = key; + + if (result.hasOwnProperty(key) !== true) { + result[key] = []; + } + + let elements: HTMLInputElement[] = Array.from(node.querySelectorAll(selector)); + + if (elements.length === 0) { + throw new Error(`No elements match the selector '${selector}' for '${key}'.`); + } + + let type = (elements[0].getAttribute('type') || elements[0].nodeName).toLowerCase(); + let values: (boolean | string)[] = (Array.isArray(value) === true) ? value as (boolean | string)[] : [value] as (boolean | string)[]; + + if (type === 'file') { + throw new Error(`Input element of type 'file' is not supported.`); + } + + for (let element of elements) { + try { + element.focus(); + element.dispatchEvent(new Event('focus')); + } catch (error) { + } + + if (type === 'select') { + element.value = undefined; + + for (let index of ['value', 'label'] as ['value', 'label']) { + if (result[key].length > 0) { + break; + } + + for (let option of Array.from((element as unknown as HTMLSelectElement).options)) { + option.selected = values.includes(option[index]); + + if (option.selected === true) { + result[key].push(option.value); + + if (element.multiple !== true) { + break; + } + } + } + } + } else if (type === 'checkbox' || type === 'radio') { + element.checked = (value === true) || values.includes(element.value); + + if (element.checked === true) { + result[key].push(element.value); + } + } else if (typeof value === 'string') { + if (element.isContentEditable === true) { + result[key].push(element.textContent = value); + } else { + result[key].push(element.value = value); + } + } + + for (let trigger of ['input', 'change']) { + element.dispatchEvent(new Event(trigger, { 'bubbles': true })); + } + + try { + element.blur(); + element.dispatchEvent(new Event('blur')); + } catch (error) { + } + + if (type === 'checkbox' || type === 'radio') { + break; + } + } + } + + return result; + }; + + return this.evaluate(callback as unknown as EvaluateFunc<[ElementHandle, T]>, data) as any; +}; + +Super.prototype.fillFormByXPath = function >(data: T) { + let callback = (node: HTMLFormElement, data: T) => { + if (node.nodeName.toLowerCase() !== 'form') { + throw new Error('Element is not a element.'); + } + + let result: Record = {}; + + for (let [key, value] of Object.entries(data)) { + let selector = key; + + if (result.hasOwnProperty(key) !== true) { + result[key] = []; + } + + let element: Node = null; + let elements: HTMLInputElement[] = []; + let iterator = document.evaluate(selector, node, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null); + + while ((element = iterator.iterateNext()) != null) { + elements.push(element as HTMLInputElement); + } + + if (elements.length === 0) { + throw new Error(`No elements match the selector '${selector}' for '${key}'.`); + } + + let type = (elements[0].getAttribute('type') || elements[0].nodeName).toLowerCase(); + let values: (boolean | string)[] = (Array.isArray(value) === true) ? value as (boolean | string)[] : [value] as (boolean | string)[]; + + if (type === 'file') { + throw new Error(`Input element of type 'file' is not supported.`); + } + + for (let element of elements) { + try { + element.focus(); + element.dispatchEvent(new Event('focus')); + } catch (error) { + } + + if (type === 'select') { + element.value = undefined; + + for (let index of ['value', 'label'] as ['value', 'label']) { + if (result[key].length > 0) { + break; + } + + for (let option of Array.from((element as unknown as HTMLSelectElement).options)) { + option.selected = values.includes(option[index]); + + if (option.selected === true) { + result[key].push(option.value); + + if (element.multiple !== true) { + break; + } + } + } + } + } else if (type === 'checkbox' || type === 'radio') { + element.checked = (value === true) || values.includes(element.value); + + if (element.checked === true) { + result[key].push(element.value); + } + } else if (typeof value === 'string') { + if (element.isContentEditable === true) { + result[key].push(element.textContent = value); + } else { + result[key].push(element.value = value); + } + } + + for (let trigger of ['input', 'change']) { + element.dispatchEvent(new Event(trigger, { 'bubbles': true })); + } + + try { + element.blur(); + element.dispatchEvent(new Event('blur')); + } catch (error) { + } + + if (type === 'checkbox' || type === 'radio') { + break; + } + } + } + + return result; + }; + + return this.evaluate(callback as unknown as EvaluateFunc<[ElementHandle, T]>, data) as any; +}; + +Super.prototype.getInnerHTML = function () { + return this.evaluate((node: Element) => { + return (node as HTMLElement).innerHTML; + }); +}; + +Super.prototype.getInnerText = function () { + return this.evaluate((node: Element) => { + return (node as HTMLElement).innerText; + }); +}; + +Super.prototype.number = function (decimal: string = '.', property: any) { + let callback = (node: any, decimal: string, property: any) => { + let data = (node[property] as unknown) as string; + + if (typeof data === 'string') { + decimal = decimal ?? '.'; + + if (typeof decimal === 'string') { + decimal = decimal.replace(/[.]/g, '\\$&'); + } + + let matches = data.match(/((?:[-+]|\b)[0-9]+(?:[ ,.'`´]*[0-9]+)*)\b/g); + + if (matches != null) { + return matches.map((value) => parseFloat(value.replace(new RegExp(`[^-+0-9${decimal}]+`, 'g'), '').replace(decimal, '.'))); + } + } + + return null; + }; + + return this.evaluate(callback, decimal, property as any); +}; + +Super.prototype.selectByLabel = function (...values: string[]) { + for (let value of values) { + console.assert(typeof value === 'string', `Values must be strings. Found value '${value}' of type '${typeof value}'.`); + } + + let callback = (node: HTMLSelectElement, values: string[]) => { + if (node.nodeName.toLowerCase() !== 'select') { + throw new Error('Element is not a