diff --git a/package.json b/package.json index 770c4bf..bf8cc68 100644 --- a/package.json +++ b/package.json @@ -13,7 +13,8 @@ "docs": "npx jsdoc2md -c .jsdoc.json --files 'src/*.js' > docs/API.md", "semantic-release": "semantic-release", "semantic-release-dry": "semantic-release --dry-run --branches $CI_BRANCH 1.x main", - "prepare": "husky install" + "prepare": "husky install", + "generate-bloom-filter": "node test/generateAcquisitionBloomFilters.js" }, "repository": { "type": "git", diff --git a/src/index.js b/src/index.js index 96f6733..2c73368 100644 --- a/src/index.js +++ b/src/index.js @@ -11,10 +11,12 @@ */ const { sampleRUM } = window.hlx.rum; +const basicHash = (string, modulo) => Array.from(string) + .map((a) => a.charCodeAt(0)) + .reduce((a, b) => a + b, 1) % modulo; + const fflags = { - has: (flag) => fflags[flag].indexOf(Array.from(window.origin) - .map((a) => a.charCodeAt(0)) - .reduce((a, b) => a + b, 1) % 1371) !== -1, + has: (flag) => fflags[flag].indexOf(basicHash(window.origin, 1371)) !== -1, enabled: (flag, callback) => fflags.has(flag) && callback(), disabled: (flag, callback) => !fflags.has(flag) && callback(), onetrust: [543, 770, 1136], @@ -269,3 +271,48 @@ fflags.enabled('email', () => { params.filter((param) => regex.test(param)).forEach((param) => sampleRUM('email', { source: network, target: param })); }); }); + +// acquisition checkpoint +(() => { + const sanitize = (str) => (str || '').toLowerCase().replace(/[^a-zA-Z0-9]/, ''); + const toBinary = (s) => Array.from(s, (c) => parseInt(c, 16).toString(2).padStart(4, '0')).join(''); + const moduli = [239, 241, 251]; // prime numbers smaller than 256 + const knownVendors = toBinary('fbdef75ff9f4dedbfdeaba8f21e7884aebf67cfde6eefeea3b8ff32c6fb68a40'); // known vendors bloom filter + const categories = { + affiliate: ['aff', 'affiliate', 'affiliatemarketing'], + audio: ['spotify'], + brand: ['brand'], + display: ['advertorial', 'banner', 'cpa', 'cpc', 'cpm', 'cpv', 'discover', 'display', 'fbads', 'goppc', 'highimpact', 'inred', 'nps', 'paid', 'paiddisplay', 'placement', 'post', 'poster', 'pp', 'ppc'], + email: ['em', 'email', 'mail', 'newsletter'], + local: ['yext'], + owned: ['owned'], + qr: ['qr', 'qrcode'], + search: ['direct', 'google', 'googleflights', 'paidsearch', 'paidsearchnb', 'sea', 'sem'], + sms: ['sms'], + social: ['facebook', 'gnews', 'instagramfeed', 'instagramreels', 'instagramstories', 'line', 'linkedin', 'metasearch', 'organicsocialown', 'paidsocial', 'social', 'sociallinkedin', 'socialpaid'], + video: ['native', 'paidvideo', 'pvid', 'video', 'youtube'], + web: ['webapp'], + }; + const sources = { + paid: ['affiliate', 'audio', 'display', 'local', 'search', 'social', 'video'], + owned: ['brand', 'email', 'owned', 'qr', 'sms', 'web'], + }; + // these 'vendors' appear differently in the utmsource field. They are mapped to a single value: + const vendorMappings = [ + { regex: /newsshowcase|aci|google|googleads|gads|google-ads|google_search|google_deman|aw|adwords|dv360|gdn|doubleclick|dbm|gmb/i, result: 'google' }, + { regex: /instagram|ig/i, result: 'instagram' }, + { regex: /face|fb|meta/i, result: 'facebook' }, + { regex: /email/i, result: 'email' }, + { regex: /bing/i, result: 'bing' }, + { regex: /amazon|ctv/i, result: 'amazon' }, + { regex: /qr/i, result: 'qrcode' }, + { regex: /youtube|yt/i, result: 'youtube' }, + ]; + const utmMedium = sanitize(new URLSearchParams(window.location.search).get('utm_medium')); + const utmSource = sanitize(new URLSearchParams(window.location.search).get('utm_source')); + const preVendor = vendorMappings.find(({ regex }) => regex.test(utmSource))?.result || utmSource; + const category = Object.keys(categories).find((key) => (categories[key] || []).includes(utmMedium)) || ''; + const source = Object.keys(sources).find((key) => (sources[key] || []).includes(category)) || ''; + const vendor = moduli.every((modulo) => knownVendors.charAt(basicHash(preVendor, modulo)) === '1') ? preVendor : ''; + sampleRUM('acquisition', { source: `${source}:${category}:${vendor}` }); +})(); diff --git a/test/generateAcquisitionBloomFilters.js b/test/generateAcquisitionBloomFilters.js new file mode 100644 index 0000000..af90316 --- /dev/null +++ b/test/generateAcquisitionBloomFilters.js @@ -0,0 +1,138 @@ +/* + * Copyright 2024 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +const moduli = [239, 241, 251]; + +const basicHash = (string, modulo) => Array.from(string) + .map((a) => a.charCodeAt(0)) + .reduce((a, b) => a + b, 1) % modulo; + +const binaryToText = (binaryString) => { + return parseInt(binaryString, 2).toString(16); +}; + +// known vendors +const vendors = [ + 'adlocus', + 'admitadmonetize', + 'aftership', + 'amazon', + 'attentive', + 'avivid', + 'baidu', + 'banner', + 'bing', + 'blis', + 'cheetah', + 'cj', + 'clarin', + 'clm', + 'criteo', + 'demandgen', + 'digidip', + 'digitalremedycom', + 'discovery', + 'display', + 'eloqua', + 'email', + 'eminent', + 'facebook', + 'famoussmokeshopinc', + 'fark', + 'fashionistatop', + 'feedotter', + 'flipboard', + 'flyer', + 'geniusmonkey', + 'giftcardmall', + 'google', + 'hotstar', + 'hrs', + 'hsemail', + 'inmobicom', + 'inred', + 'insider', + 'instagram', + 'integrateddisplay', + 'internal', + 'line', + 'linkbux', + 'linkedin', + 'linkinbio', + 'locationpage', + 'lveng', + 'm2trans', + 'manutd', + 'marketo', + 'massiva', + 'mavenintent', + 'mediamond', + 'mentionme', + 'microsoft', + 'native', + 'newsletter', + 'nexus', + 'openweb', + 'optum', + 'outbrain', + 'outlook', + 'partner', + 'partnerstudentbeanscom', + 'petcademy', + 'pinterest', + 'pmax', + 'programmatic', + 'programmaticgdn', + 'pushly', + 'qrcode', + 'reddit', + 'redone', + 'retailercode', + 'seznam', + 'shopfully', + 'silverpop', + 'sky', + 'skyscanner', + 'snapchat', + 'spotify', + 'substack', + 'taboola', + 'teads', + 'thetradedesk', + 'tiktok', + 'tradedesk', + 'tradetracker', + 'twitter', + 'web', + 'yahoo', + 'yandex', + 'yext', + 'yieldkit', + 'youtube', +]; + +// Initialize 256 chars long array filled with initial zeros +const bloomFilter = new Array(256).fill(0); + +// Insert each vendor into the Bloom filter +vendors.forEach((vendor) => { + moduli.forEach((modulo) => { + const hash = basicHash(vendor, modulo); + bloomFilter[hash] = 1; + }); +}); + +const f = bloomFilter.reduce((acc, _, index) => (index % 4 === 0 ? [...acc, bloomFilter.slice(index, index + 4).join('')] : acc), []) + .map((binaryChar) => binaryToText(binaryChar)) + .join(''); + +console.log(`Bloom Filter: ${f}`);