Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37,245 changes: 22,835 additions & 14,410 deletions package-lock.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion packages/spacecat-shared-data-access/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
"access": "public"
},
"dependencies": {
"@adobe/spacecat-shared-utils": "1.81.1",
"@adobe/spacecat-shared-utils": "1.85.2",
"@aws-sdk/client-dynamodb": "3.940.0",
"@aws-sdk/client-s3": "^3.940.0",
"@aws-sdk/lib-dynamodb": "3.940.0",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import {
isNonEmptyObject,
isNumber,
isObject,
isValidUrl,
isValidBaseUrl,
} from '@adobe/spacecat-shared-utils';

import SchemaBuilder from '../base/schema.builder.js';
Expand Down Expand Up @@ -83,7 +83,7 @@ const schema = new SchemaBuilder(ImportJob, ImportJobCollection)
.addAttribute('baseURL', {
type: 'string',
required: true,
validate: (value) => isValidUrl(value),
validate: (value) => isValidBaseUrl(value),
})
.addAttribute('duration', {
type: 'number',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import {
isIsoDate,
isNumber,
isObject,
isValidUrl,
isValidBaseUrl,
isString,
} from '@adobe/spacecat-shared-utils';

Expand All @@ -37,7 +37,7 @@ const schema = new SchemaBuilder(ScrapeJob, ScrapeJobCollection)
.addAttribute('baseURL', {
type: 'string',
required: true,
validate: (value) => isValidUrl(value),
validate: (value) => isValidBaseUrl(value),
})
.addAttribute('processingType', {
type: 'string',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

/* c8 ignore start */

import { isObject, isValidUrl, isValidUUID } from '@adobe/spacecat-shared-utils';
import { isObject, isValidBaseUrl, isValidUUID } from '@adobe/spacecat-shared-utils';

import SchemaBuilder from '../base/schema.builder.js';
import SiteCandidate from './site-candidate.model.js';
Expand All @@ -33,7 +33,7 @@ const schema = new SchemaBuilder(SiteCandidate, SiteCandidateCollection)
.addAttribute('baseURL', {
type: 'string',
required: true,
validate: (value) => isValidUrl(value),
validate: (value) => isValidBaseUrl(value),
})
.addAttribute('hlxConfig', {
type: 'any',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import {
isNonEmptyObject,
isObject,
isValidUrl,
isValidBaseUrl,
} from '@adobe/spacecat-shared-utils';

import { Config, DEFAULT_CONFIG, validateConfiguration } from './config.js';
Expand Down Expand Up @@ -51,7 +52,7 @@ const schema = new SchemaBuilder(Site, SiteCollection)
.addAttribute('baseURL', {
type: 'string',
required: true,
validate: (value) => isValidUrl(value),
validate: (value) => isValidBaseUrl(value),
})
.addAttribute('name', {
type: 'string',
Expand Down
2 changes: 1 addition & 1 deletion packages/spacecat-shared-google-client/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
"@adobe/fetch": "4.2.3",
"@adobe/helix-universal": "5.3.0",
"@adobe/spacecat-shared-http-utils": "1.19.3",
"@adobe/spacecat-shared-utils": "1.81.1",
"@adobe/spacecat-shared-utils": "1.85.2",
"@aws-sdk/client-secrets-manager": "3.940.0",
"aws-xray-sdk": "3.12.0",
"google-auth-library": "10.5.0",
Expand Down
3 changes: 2 additions & 1 deletion packages/spacecat-shared-google-client/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import {
isArray,
isInteger,
isValidDate,
isValidBaseUrl,
isValidUrl,
resolveCustomerSecretsName,
} from '@adobe/spacecat-shared-utils';
Expand Down Expand Up @@ -61,7 +62,7 @@ export default class GoogleClient {
}

static async createFrom(context, baseURL) {
if (!isValidUrl(baseURL)) {
if (!isValidBaseUrl(baseURL)) {
throw new Error('Error creating GoogleClient: Invalid base URL');
}

Expand Down
3 changes: 2 additions & 1 deletion packages/spacecat-shared-utils/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
"urijs": "1.19.11",
"validator": "^13.15.15",
"world-countries": "5.1.0",
"zod": "^4.1.11"
"zod": "^4.1.11",
"tldts": "7.0.17"
}
}
49 changes: 49 additions & 0 deletions packages/spacecat-shared-utils/src/functions.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
*/

import isEmail from 'validator/lib/isEmail.js';
import { parse } from 'tldts';

// Precompile regular expressions
const REGEX_ISO_DATE = /\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{3}Z/;
Expand Down Expand Up @@ -198,6 +199,53 @@ function isValidUrl(urlString) {
}
}

/**
* Validates whether the given string is a valid BaseURL with http or https protocol.
* Validates that the URL is clean: no explicit ports, hash fragments, or query parameters.
* Paths are allowed.
*
* @param {string} urlString - The string to validate.
* @returns {boolean} True if the given string validates successfully.
*/
function isValidBaseUrl(urlString) {
try {
let url = urlString.trim();

// reject control characters (LF, CR, etc.)
if ([...url].some((c) => {
const code = c.charCodeAt(0);
return code < 32 || code === 127;
})) return false;

const hasProtocol = /^[a-z][a-z0-9+\-.]*:\/\//i.test(url);
if (!hasProtocol) {
url = `https://${url}`;
}

const urlObj = new URL(url);

if (urlObj.protocol !== 'http:' && urlObj.protocol !== 'https:') return false;
if (urlObj.search || urlObj.hash || urlObj.port) return false;
if (urlObj.username || urlObj.password) return false;
if (urlObj.pathname.includes('..') || urlObj.pathname.includes('//')) return false;

// ensure the hostname is a valid registrable domain and not an IP
const domain = parse(urlObj.hostname, { allowPrivateDomains: true });
if (!domain.domain || domain.isIp) return false;
if (!domain.isIcann && !domain.isPrivate) return false;

// validate each label for length and allowed characters
for (const label of urlObj.hostname.split('.')) {
if (label.length === 0 || label.length > 63) return false;
if (!/^[a-z0-9](?:[a-z0-9-]*[a-z0-9])?$/i.test(label)) return false;
}

return true;
} catch {
return false;
}
}

/**
* Validates whether the given string is a valid UUID.
* @param {string} uuid - The string to validate.
Expand Down Expand Up @@ -335,6 +383,7 @@ export {
isValidDate,
isValidEmail,
isValidUrl,
isValidBaseUrl,
isValidUUID,
isValidIMSOrgId,
isValidHelixPreviewUrl,
Expand Down
2 changes: 2 additions & 0 deletions packages/spacecat-shared-utils/src/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ export function toBoolean(value: unknown): boolean;

export function isValidUrl(urlString: string): boolean;

export function isValidBaseUrl(urlString: string): boolean;

export function isValidHelixPreviewUrl(urlString: string): boolean;

export function isValidUUID(uuid: string): boolean;
Expand Down
1 change: 1 addition & 0 deletions packages/spacecat-shared-utils/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export {
isValidDate,
isValidEmail,
isValidUrl,
isValidBaseUrl,
isValidUUID,
isValidIMSOrgId,
isValidHelixPreviewUrl,
Expand Down
183 changes: 183 additions & 0 deletions packages/spacecat-shared-utils/test/functions.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import {
isValidDate,
isValidEmail,
isValidUrl,
isValidBaseUrl,
isValidUUID,
isValidIMSOrgId,
isValidHelixPreviewUrl,
Expand Down Expand Up @@ -303,6 +304,188 @@ describe('Shared functions', () => {
});
});

describe('isValidBaseUrl', () => {
it('returns false for non-string inputs', async () => {
const nonStringInputs = [
null,
undefined,
1234,
true,
false,
{},
[],
Symbol('test'),
];

nonStringInputs.forEach((url) => expect(isValidBaseUrl(url)).to.be.false);
});

it('returns false for empty or whitespace-only strings', async () => {
expect(isValidBaseUrl('')).to.be.false;
expect(isValidBaseUrl(' ')).to.be.false;
expect(isValidBaseUrl('\t')).to.be.false;
});

it('returns false for strings with control characters', async () => {
expect(isValidBaseUrl('example.com\u0000')).to.be.false;
expect(isValidBaseUrl('example.com\u001F')).to.be.false;
expect(isValidBaseUrl('example.com\u007F')).to.be.false;
expect(isValidBaseUrl('example\u0000.com')).to.be.false;
expect(isValidBaseUrl('https://example\u0000.com')).to.be.false;
});

it('returns false for URLs with non-http/https protocols', async () => {
const invalidProtocols = [
'ftp://example.com',
'file://example.com',
'mailto:test@example.com',
// eslint-disable-next-line no-script-url
'javascript:alert(1)',
'data:text/html,<script>alert(1)</script>',
'ws://example.com',
'wss://example.com',
];

invalidProtocols.forEach((url) => expect(isValidBaseUrl(url)).to.be.false);
});

it('returns false for URLs with invalid paths', async () => {
expect(isValidBaseUrl('example.com/asd..dsa')).to.be.false;
expect(isValidBaseUrl('https://example.com/..asd')).to.be.false;
expect(isValidBaseUrl('https://example.com/.../asd')).to.be.false;
expect(isValidBaseUrl('http://example.com//')).to.be.false;
expect(isValidBaseUrl('http://example.com/asd//dsa')).to.be.false;
});

it('returns false for URLs with query parameters', async () => {
expect(isValidBaseUrl('https://example.com?param=value')).to.be.false;
expect(isValidBaseUrl('https://example.com?foo=bar&baz=qux')).to.be.false;
expect(isValidBaseUrl('example.com?param=value')).to.be.false;
});

it('returns false for URLs with hash fragments', async () => {
expect(isValidBaseUrl('https://example.com#section')).to.be.false;
expect(isValidBaseUrl('https://example.com#anchor')).to.be.false;
expect(isValidBaseUrl('example.com#section')).to.be.false;
});

it('returns false for URLs with explicit ports', async () => {
expect(isValidBaseUrl('https://example.com:0')).to.be.false;
expect(isValidBaseUrl('https://example.com:80')).to.be.false;
expect(isValidBaseUrl('http://example.com:8080')).to.be.false;
expect(isValidBaseUrl('http://example.com:8')).to.be.false;
expect(isValidBaseUrl('example.com:0')).to.be.false;
expect(isValidBaseUrl('example.com:80')).to.be.false;
expect(isValidBaseUrl('example.com:8080')).to.be.false;
});

it('returns false for IP addresses', async () => {
expect(isValidBaseUrl('https://192.168.1.1')).to.be.false;
expect(isValidBaseUrl('https://127.0.0.1')).to.be.false;
expect(isValidBaseUrl('http://255.255.255.255')).to.be.false;
expect(isValidBaseUrl('192.168.1.1')).to.be.false;
expect(isValidBaseUrl('255.255.255.256')).to.be.false;
});

it('returns false for invalid domain labels', async () => {
expect(isValidBaseUrl('https://-example.com')).to.be.false;
expect(isValidBaseUrl('https://example-.com')).to.be.false;
expect(isValidBaseUrl('-example.com')).to.be.false;
expect(isValidBaseUrl('example-.com')).to.be.false;
expect(isValidBaseUrl('https://example_.com')).to.be.false;
expect(isValidBaseUrl('https://example@.com')).to.be.false;
expect(isValidBaseUrl('https://.example.com')).to.be.false;
expect(isValidBaseUrl('https://example..com')).to.be.false;
expect(isValidBaseUrl('https://examp#le..com')).to.be.false;
expect(isValidBaseUrl('.example.com')).to.be.false;
expect(isValidBaseUrl('example..com')).to.be.false;
});

it('returns false for domain labels exceeding 63 characters', async () => {
const longLabel = 'a'.repeat(64);
expect(isValidBaseUrl(`https://${longLabel}.com`)).to.be.false;
expect(isValidBaseUrl(`${longLabel}.com`)).to.be.false;
});

it('returns false for invalid domains (not ICANN or private)', async () => {
expect(isValidBaseUrl('https://invalid..domain')).to.be.false;
expect(isValidBaseUrl('invalid..domain')).to.be.false;
});

it('returns false for malformed URLs that cause URL constructor to throw', async () => {
expect(isValidBaseUrl('https://')).to.be.false;
expect(isValidBaseUrl('https:////')).to.be.false;
expect(isValidBaseUrl('://example.com')).to.be.false;
});

it('returns true for valid https URLs', async () => {
expect(isValidBaseUrl('https://abc.xyz')).to.be.true;
expect(isValidBaseUrl('https://example.com/path')).to.be.true;
expect(isValidBaseUrl('https://subdomain.example.com/path/abc')).to.be.true;
expect(isValidBaseUrl('https://test.co.uk/path/')).to.be.true;
expect(isValidBaseUrl('https://exam-ple.org')).to.be.true;
});

it('returns true for valid domains without protocol (auto-prepends https://)', async () => {
expect(isValidBaseUrl('example.com')).to.be.true;
expect(isValidBaseUrl('www.example.com')).to.be.true;
expect(isValidBaseUrl('subdomain.example.com')).to.be.true;
expect(isValidBaseUrl('abc.xyz')).to.be.true;
expect(isValidBaseUrl('test.co.uk')).to.be.true;
expect(isValidBaseUrl('example.org')).to.be.true;
});

it('handles case-insensitive protocol matching', async () => {
expect(isValidBaseUrl('HTTPS://example.com')).to.be.true;
expect(isValidBaseUrl('Https://example.com')).to.be.true;
expect(isValidBaseUrl('https://EXAMPLE.COM')).to.be.true;
expect(isValidBaseUrl('https://EXAmpLE.COM')).to.be.true;
});

it('handles URLs with leading/trailing whitespace', async () => {
expect(isValidBaseUrl(' https://example.com ')).to.be.true;
expect(isValidBaseUrl(' example.com ')).to.be.true;
});

it('returns true for valid domain labels at maximum length', async () => {
const maxLengthLabel = 'a'.repeat(63);
expect(isValidBaseUrl(`https://${maxLengthLabel}.com`)).to.be.true;
expect(isValidBaseUrl(`${maxLengthLabel}.com`)).to.be.true;
});

it('returns true for domains with hyphens in labels', async () => {
expect(isValidBaseUrl('https://sub-domain.example.com')).to.be.true;
expect(isValidBaseUrl('https://my-site.example.org')).to.be.true;
expect(isValidBaseUrl('sub-domain.example.com')).to.be.true;
});

it('returns true for domains with numbers in labels', async () => {
expect(isValidBaseUrl('https://test123.example.com')).to.be.true;
expect(isValidBaseUrl('test123.example.com')).to.be.true;
});

it('returns false for URLs with anything past the TLD', async () => {
expect(isValidBaseUrl('https://example.com?query=value')).to.be.false;
expect(isValidBaseUrl('https://example.com#hash')).to.be.false;
expect(isValidBaseUrl('https://example.com:8080')).to.be.false;
});

it('returns true for non ascii characters', async () => {
expect(isValidBaseUrl('https://тест.example.com')).to.be.true;
expect(isValidBaseUrl('https://тест.мкд')).to.be.true;
expect(isValidBaseUrl('https://परीक्षा.संगठन')).to.be.true;
});

it('returns false for invalid TLDs', async () => {
expect(isValidBaseUrl('https://example.c')).to.be.false;
expect(isValidBaseUrl('https://example.qq')).to.be.false;
});

it('returns false for invalid domain labels', async () => {
expect(isValidBaseUrl('https://example.-ab.com')).to.be.false;
});
});

describe('isValidIMSOrgId', () => {
it('returns false for invalid IMS Org Id', async () => {
expect(isValidIMSOrgId('invalid-ims-org-id')).to.be.false;
Expand Down
Loading
Loading