Skip to main content

WebPageData

Nosible API Client


Nosible API Client / WebPageData

Class: WebPageData

Defined in: src/scrape/webPageData.ts:16

Represents scraped data for a single web page and provides helpers to work with ordered snippets and serialization.

Constructors

Constructor

new WebPageData(client, res): WebPageData

Defined in: src/scrape/webPageData.ts:32

Creates a new WebPageData instance from a validated scrape response.

Parameters

client

NosibleClient

Nosible client used by the consumer of this instance

res

Validated scrape response payload

request

{ raw_url: string; url: string; hash: string; geo: string; proxy: string; scheme: string; netloc: string; prefix: string; domain: string; suffix: string; path: string; query: string; fragment: string; query_allowed: Record<string, never>; query_blocked: Record<string, never>; } = ...

request.raw_url

string = ...

request.url

string = ...

request.hash

string = ...

request.geo

string = ...

request.proxy

string = ...

request.scheme

string = ...

request.netloc

string = ...

request.prefix

string = ...

request.domain

string = ...

request.suffix

string = ...

request.path

string = ...

request.query

string = ...

request.fragment

string = ...

request.query_allowed

Record<string, never> = querySchema

request.query_blocked

Record<string, never> = querySchema

page

{ title: string; description: string; author: string; published: Date; visited: Date; certain?: boolean; } = ...

page.title

string = ...

page.description

string = ...

page.author

string = ...

page.published

Date = ...

page.visited

Date = ...

page.certain?

boolean = ...

statistics

{ snippets: number; sentences: number; words: number; characters: number; images: number; videos: number; audio: number; tables: number; lists: number; blocks: number; links: number; files: number; } = ...

statistics.snippets

number = ...

statistics.sentences

number = ...

statistics.words

number = ...

statistics.characters

number = ...

statistics.images

number = ...

statistics.videos

number = ...

statistics.audio

number = ...

statistics.tables

number = ...

statistics.lists

number = ...

statistics.blocks

number = ...

number = ...

statistics.files

number = ...

languages

Record<string, number> = ...

snippets

Record<string, { url_hash: string; snippet_hash: string; prev_snippet_hash: string | null; next_snippet_hash: string | null; content: string; words?: string; language: string; statistics?: { sentences: number; words: number; characters: number; links?: number; images?: number; }; links?: Record<string, string>; images?: string[]; }> = ...

full_text

string = ...

metadata

Record<string, any> = ...

structured

object[] = ...

url_tree

Record<string, any> = ...

Returns

WebPageData

Properties

data

data: object

Defined in: src/scrape/webPageData.ts:18

request

request: object

request.raw_url

raw_url: string

request.url

url: string

request.hash

hash: string

request.geo

geo: string

request.proxy

proxy: string

request.scheme

scheme: string

request.netloc

netloc: string

request.prefix

prefix: string

request.domain

domain: string

request.suffix

suffix: string

request.path

path: string

request.query

query: string

request.fragment

fragment: string

request.query_allowed

query_allowed: Record<string, never> = querySchema

request.query_blocked

query_blocked: Record<string, never> = querySchema

page

page: object

page.title

title: string

page.description

description: string

page.author

author: string

page.published

published: Date

page.visited

visited: Date

page.certain?

optional certain: boolean

statistics

statistics: object

statistics.snippets

snippets: number

statistics.sentences

sentences: number

statistics.words

words: number

statistics.characters

characters: number

statistics.images

images: number

statistics.videos

videos: number

statistics.audio

audio: number

statistics.tables

tables: number

statistics.lists

lists: number

statistics.blocks

blocks: number

links: number

statistics.files

files: number

languages

languages: Record<string, number>

snippets

snippets: Record<string, { url_hash: string; snippet_hash: string; prev_snippet_hash: string | null; next_snippet_hash: string | null; content: string; words?: string; language: string; statistics?: { sentences: number; words: number; characters: number; links?: number; images?: number; }; links?: Record<string, string>; images?: string[]; }>

full_text

full_text: string

metadata

metadata: Record<string, any>

structured

structured: object[]

url_tree

url_tree: Record<string, any>


page

page: object

Defined in: src/scrape/webPageData.ts:20

title

title: string

description

description: string

author

author: string

published

published: Date

visited

visited: Date

certain?

optional certain: boolean


fullText

fullText: string

Defined in: src/scrape/webPageData.ts:21


metadata

metadata: Record<string, any>

Defined in: src/scrape/webPageData.ts:22


structured

structured: any[]

Defined in: src/scrape/webPageData.ts:23


urlTree

urlTree: Record<string, any>

Defined in: src/scrape/webPageData.ts:24


statistics

statistics: object

Defined in: src/scrape/webPageData.ts:25

snippets

snippets: number

sentences

sentences: number

words

words: number

characters

characters: number

images

images: number

videos

videos: number

audio

audio: number

tables

tables: number

lists

lists: number

blocks

blocks: number

links: number

files

files: number

Methods

fromJson()

static fromJson(client, inputPath): Promise<WebPageData>

Defined in: src/scrape/webPageData.ts:52

Hydrates a WebPageData instance from a JSON file on disk. The file contents are validated with scrapeResponseSchema.

Parameters

client

NosibleClient

Nosible client used by the consumer of this instance

inputPath

string

Absolute or relative path to the input JSON file

Returns

Promise<WebPageData>

A WebPageData instance built from the validated JSON


writeJson()

writeJson(outputPath): Promise<void>

Defined in: src/scrape/webPageData.ts:70

Writes the underlying scrape response to a JSON file.

Parameters

outputPath

string

Output file path for the JSON export

Returns

Promise<void>

A promise that resolves when the file is written


getSnippets()

getSnippets(): Snippet[]

Defined in: src/scrape/webPageData.ts:78

Returns an ordered array of snippets

Returns

Snippet[]

An ordered array of snippets


getSnippet()

getSnippet(hashKey): Snippet | undefined

Defined in: src/scrape/webPageData.ts:87

Returns a snippet by hash key

Parameters

hashKey

string

The hash key of the snippet

Returns

Snippet | undefined

The snippet with the specified hash key