Skip to content

Commit 44dba5e

Browse files
feat: upgrade domhandler to 3.0.0 and htmlparser to 4.0.0
domhandler 2.4.2 β†’ 3.0.0 htmlparser2 3.10.1 β†’ 4.0.0 domhandler: https://github.com/fb55/domhandler/tree/v3.0.0 htmlparser2: https://github.com/fb55/htmlparser2/tree/v4.0.0 Refactored `utilities.formatDOM` (client parser) to use domhandler `DataNode` and `Element` classes. It comes with additional properties (e.g., `startIndex` and `endIndex`) and getters: https://github.com/fb55/domhandler/blob/v3.0.0/src/node.ts
1 parent 0207ee2 commit 44dba5e

File tree

10 files changed

+97
-101
lines changed

10 files changed

+97
-101
lines changed

β€Žkarma.conf.jsβ€Ž

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ module.exports = config => {
1313
files: [
1414
'dist/htmlparser2.js',
1515
'lib/*.js',
16+
'node_modules/domhandler/lib/node.js',
1617
'test/cases/html.js',
1718
'test/client/*.js',
1819
'test/helpers/*.js'
@@ -26,6 +27,7 @@ module.exports = config => {
2627
preprocessors: {
2728
'dist/**/*.js': ['commonjs'],
2829
'lib/**/*.js': ['commonjs'],
30+
'node_modules/domhandler/lib/node.js': ['commonjs'],
2931
'test/**/*.js': ['commonjs']
3032
},
3133

β€Žlib/html-to-dom-client.d.tsβ€Ž

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
// TypeScript Version: 4.1
22

3-
import { DomElement } from 'domhandler';
3+
import { DataNode, Element } from 'domhandler';
44

55
/**
66
* Parses HTML string to DOM nodes in browser.
77
*
88
* @param html - HTML markup.
99
* @return - DOM elements.
1010
*/
11-
export default function HTMLDOMParser(html: string): DomElement[];
11+
export default function HTMLDOMParser(html: string): Array<DataNode | Element>;

β€Žlib/html-to-dom-server.d.tsβ€Ž

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
// TypeScript Version: 4.1
22

3-
import { DomHandlerOptions, DomElement } from 'domhandler';
3+
import { DataNode, DomHandlerOptions, Element } from 'domhandler';
44

55
/**
66
* Parses HTML string to DOM nodes in Node.js.
77
*
88
* This is the same method as `require('htmlparser2').parseDOM`
9-
* https://github.com/fb55/htmlparser2/blob/v3.9.1/lib/index.js#L39-L43
9+
* https://github.com/fb55/htmlparser2/blob/v4.0.0/src/index.ts#L18-L22
1010
*
1111
* @param html - HTML markup.
12-
* @param options - Parser options (https://github.com/fb55/domhandler/tree/v2.4.2#readme).
12+
* @param options - Parser options (https://github.com/fb55/domhandler/tree/v3.0.0#readme).
1313
* @return - DOM elements.
1414
*/
1515
export default function HTMLDOMParser(
1616
html: string,
1717
options?: DomHandlerOptions
18-
): DomElement[];
18+
): Array<DataNode | Element>;

β€Žlib/html-to-dom-server.jsβ€Ž

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
1-
var Parser = require('htmlparser2/lib/Parser');
2-
var DomHandler = require('domhandler');
1+
var Parser = require('htmlparser2/lib/Parser').Parser;
2+
var DomHandler = require('domhandler').DomHandler;
33

44
/**
55
* Parses HTML string to DOM nodes in Node.js.
66
*
77
* This is the same method as `require('htmlparser2').parseDOM`
8-
* https://github.com/fb55/htmlparser2/blob/v3.9.1/lib/index.js#L39-L43
8+
* https://github.com/fb55/htmlparser2/blob/v4.0.0/src/index.ts#L18-L22
99
*
10-
* @param {String} html - HTML markup.
11-
* @param {Object} [options] - Parser options (https://github.com/fb55/domhandler/tree/v2.4.2#readme).
12-
* @return {DomElement[]} - DOM elements.
10+
* @param {string} html - HTML markup.
11+
* @param {DomHandlerOptions} [options] - Parser options (https://github.com/fb55/domhandler/tree/v3.0.0#readme).
12+
* @return {DomElement[]} - DOM elements.
1313
*/
1414
function HTMLDOMParser(html, options) {
1515
if (typeof html !== 'string') {
@@ -20,7 +20,7 @@ function HTMLDOMParser(html, options) {
2020
return [];
2121
}
2222

23-
var handler = new DomHandler(options);
23+
var handler = new DomHandler(undefined, options);
2424
new Parser(handler, options).end(html);
2525
return handler.dom;
2626
}

β€Žlib/utilities.d.tsβ€Ž

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// TypeScript Version: 4.1
22

3-
import { DomElement } from 'domhandler';
3+
import { DataNode, Element } from 'domhandler';
44

55
/**
66
* Formats DOM attributes to a hash map.
@@ -22,9 +22,9 @@ export function formatAttributes(
2222
*/
2323
export function formatDOM(
2424
nodes: NodeList,
25-
parentNode?: DomElement,
25+
parentNode?: DataNode | Element,
2626
directive?: string
27-
): DomElement[];
27+
): Array<DataNode | Element>;
2828

2929
/**
3030
* Detects if browser is Internet Explorer.

β€Žlib/utilities.jsβ€Ž

Lines changed: 53 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,15 @@
1-
var CASE_SENSITIVE_TAG_NAMES = require('./constants').CASE_SENSITIVE_TAG_NAMES;
1+
var constants = require('./constants');
2+
var domhandler = require('domhandler/lib/node');
3+
4+
var CASE_SENSITIVE_TAG_NAMES = constants.CASE_SENSITIVE_TAG_NAMES;
5+
6+
var Element = domhandler.Element;
7+
var DataNode = domhandler.DataNode;
8+
var ProcessingInstruction = domhandler.ProcessingInstruction;
29

310
var caseSensitiveTagNamesMap = {};
411
var tagName;
12+
513
for (var i = 0, len = CASE_SENSITIVE_TAG_NAMES.length; i < len; i++) {
614
tagName = CASE_SENSITIVE_TAG_NAMES[i];
715
caseSensitiveTagNamesMap[tagName.toLowerCase()] = tagName;
@@ -53,91 +61,71 @@ function formatTagName(tagName) {
5361
/**
5462
* Formats the browser DOM nodes to mimic the output of `htmlparser2.parseDOM()`.
5563
*
56-
* @param {NodeList} nodes - DOM nodes.
57-
* @param {object} [parentNode] - Formatted parent node.
58-
* @param {string} [directive] - Directive.
59-
* @return {DomElement[]} - Formatted DOM object.
64+
* @param {NodeList} nodes - DOM nodes.
65+
* @param {DataNode|Element} [parentNode] - Formatted parent node.
66+
* @param {string} [directive] - Directive.
67+
* @return {Array<DomNode|Element>} - Formatted DOM object.
6068
*/
61-
function formatDOM(nodes, parentNode, directive) {
69+
function formatDOM(domNodes, parentNode, directive) {
6270
parentNode = parentNode || null;
6371

64-
var result = [];
72+
var domNode;
6573
var node;
6674
var prevNode;
67-
var nodeObj;
68-
69-
// `NodeList` is array-like
70-
for (var i = 0, len = nodes.length; i < len; i++) {
71-
node = nodes[i];
72-
// reset
73-
nodeObj = {
74-
next: null,
75-
prev: result[i - 1] || null,
76-
parent: parentNode
77-
};
78-
79-
// set the next node for the previous node (if applicable)
80-
prevNode = result[i - 1];
81-
if (prevNode) {
82-
prevNode.next = nodeObj;
83-
}
75+
var output = [];
8476

85-
// set the node name if it's not "#text" or "#comment"
86-
// e.g., "div"
87-
if (node.nodeName[0] !== '#') {
88-
nodeObj.name = formatTagName(node.nodeName);
89-
// also, nodes of type "tag" have "attribs"
90-
nodeObj.attribs = {}; // default
91-
if (node.attributes && node.attributes.length) {
92-
nodeObj.attribs = formatAttributes(node.attributes);
93-
}
94-
}
77+
for (var i = 0, len = domNodes.length; i < len; i++) {
78+
domNode = domNodes[i];
9579

96-
// set the node type
97-
// e.g., "tag"
98-
switch (node.nodeType) {
99-
// 1 = element
80+
// set the node data given the type
81+
switch (domNode.nodeType) {
10082
case 1:
101-
if (nodeObj.name === 'script' || nodeObj.name === 'style') {
102-
nodeObj.type = nodeObj.name;
103-
} else {
104-
nodeObj.type = 'tag';
105-
}
106-
// recursively format the children
107-
nodeObj.children = formatDOM(node.childNodes, nodeObj);
83+
// script, style, or tag
84+
node = new Element(
85+
formatTagName(domNode.nodeName),
86+
formatAttributes(domNode.attributes)
87+
);
88+
node.children = formatDOM(domNode.childNodes, node);
10889
break;
109-
// 2 = attribute
110-
// 3 = text
90+
11191
case 3:
112-
nodeObj.type = 'text';
113-
nodeObj.data = node.nodeValue;
92+
node = new DataNode('text', domNode.nodeValue);
11493
break;
115-
// 8 = comment
94+
11695
case 8:
117-
nodeObj.type = 'comment';
118-
nodeObj.data = node.nodeValue;
96+
node = new DataNode('comment', domNode.nodeValue);
11997
break;
12098
}
12199

122-
result.push(nodeObj);
100+
// set next for previous node
101+
prevNode = output[i - 1] || null;
102+
if (prevNode) {
103+
prevNode.next = node;
104+
}
105+
106+
// set properties for current node
107+
node.parent = parentNode;
108+
node.prev = prevNode;
109+
node.next = null;
110+
111+
output.push(node);
123112
}
124113

125114
if (directive) {
126-
result.unshift({
127-
name: directive.substring(0, directive.indexOf(' ')).toLowerCase(),
128-
data: directive,
129-
type: 'directive',
130-
next: result[0] ? result[0] : null,
131-
prev: null,
132-
parent: parentNode
133-
});
134-
135-
if (result[1]) {
136-
result[1].prev = result[0];
115+
node = new ProcessingInstruction(
116+
directive.substring(0, directive.indexOf(' ')).toLowerCase(),
117+
directive
118+
);
119+
node.next = output[0] || null;
120+
node.parent = parentNode;
121+
output.unshift(node);
122+
123+
if (output[1]) {
124+
output[1].prev = output[0];
137125
}
138126
}
139127

140-
return result;
128+
return output;
141129
}
142130

143131
/**

β€Žpackage.jsonβ€Ž

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,8 @@
3434
"pojo"
3535
],
3636
"dependencies": {
37-
"@types/domhandler": "2.4.1",
38-
"domhandler": "2.4.2",
39-
"htmlparser2": "3.10.1"
37+
"domhandler": "3.0.0",
38+
"htmlparser2": "4.0.0"
4039
},
4140
"devDependencies": {
4241
"@commitlint/cli": "^11.0.0",

β€Žtest/helpers/run-tests.jsβ€Ž

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
var isKarma =
2+
typeof window === 'object' && typeof window.__karma__ === 'object';
3+
14
/**
25
* Runs tests.
36
*
@@ -18,6 +21,13 @@ function runTests(assert, actualParser, expectedParser, testCases) {
1821
var actualOutput = actualParser(testCase.data, parserOptions);
1922
var expectedOutput = expectedParser(testCase.data, parserOptions);
2023

24+
// use `JSON.decycle` since `assert.deepEqual` fails
25+
// when instance types are different in the browser
26+
if (isKarma) {
27+
actualOutput = JSON.decycle(actualOutput);
28+
expectedOutput = JSON.decycle(expectedOutput);
29+
}
30+
2131
assert.deepEqual(actualOutput, expectedOutput);
2232
});
2333
});

β€Žtest/server/index.jsβ€Ž

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ const html = '<html>';
3131

3232
describe('server parser', () => {
3333
// before
34-
mock('htmlparser2/lib/Parser', Parser);
35-
mock('domhandler', DomHandler);
34+
mock('htmlparser2/lib/Parser', { Parser });
35+
mock('domhandler', { DomHandler });
3636
const parse = require('../..');
3737

3838
it('calls `DomHandler` and `Parser`', () => {
@@ -45,7 +45,7 @@ describe('server parser', () => {
4545
it('passes options to `DomHandler` and `Parser`', () => {
4646
const options = { decodeEntities: true };
4747
parse(html, options);
48-
expect(DomHandler.calledWith(options)).to.equal(true);
48+
expect(DomHandler.calledWith(undefined, options)).to.equal(true);
4949
expect(Parser.calledWith(DomHandler, options));
5050
});
5151

β€Žtest/types/index.test.tsβ€Ž

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,16 @@
1-
import parseDOM from 'html-dom-parser';
1+
import parse from 'html-dom-parser';
22

3-
// $ExpectType DomElement[]
4-
parseDOM('<div>text</div>');
3+
// $ExpectType (DataNode | Element)[]
4+
parse('<div>text</div>');
55

6-
// $ExpectType DomElement[]
7-
parseDOM('<div>text</div>', { normalizeWhitespace: true });
6+
// $ExpectType (DataNode | Element)[]
7+
parse('<div>text</div>', { normalizeWhitespace: true });
88

9-
// $ExpectType DomElement[]
10-
parseDOM('<div>text</div>', { withDomLvl1: true });
9+
// $ExpectType (DataNode | Element)[]
10+
parse('<div>text</div>', { withStartIndices: true });
1111

12-
// $ExpectType DomElement[]
13-
parseDOM('<div>text</div>', { withStartIndices: true });
12+
// $ExpectType (DataNode | Element)[]
13+
parse('<div>text</div>', { withEndIndices: true });
1414

15-
// $ExpectType DomElement[]
16-
parseDOM('<div>text</div>', { withEndIndices: true });
17-
18-
// $ExpectType DomElement[]
19-
parseDOM('');
15+
// $ExpectType (DataNode | Element)[]
16+
parse('');

0 commit comments

Comments
Β (0)