Skip to content

Commit e026324

Browse files
sebmarkbageAndyPengc12
authored andcommitted
[Flight] Optimize Large Strings by Not Escaping Them (facebook#26932)
This introduces a Text row (T) which is essentially a string blob and refactors the parsing to now happen at the binary level. ``` RowID + ":" + "T" + ByteLengthInHex + "," + Text ``` Today, we encode all row data in JSON, which conveniently never has newline characters and so we use newline as the line terminator. We can't do that if we pass arbitrary unicode without escaping it. Instead, we pass the byte length (in hexadecimal) in the leading header for this row tag followed by a comma. We could be clever and use fixed or variable-length binary integers for the row id and length but it's not worth the more difficult debuggability so we keep these human readable in text. Before this PR, we used to decode the binary stream into UTF-8 strings before parsing them. This is inefficient because sometimes the slices end up having to be copied so it's better to decode it directly into the format. The follow up to this is also to add support for binary data and then we can't assume the entire payload is UTF-8 anyway. So this refactors the parser to parse the rows in binary and then decode the result into UTF-8. It does add some overhead to decoding on a per row basis though. Since we do this, we need to encode the byte length that we want decode - not the string length. Therefore, this requires clients to receive binary data and why I had to delete the string option. It also means that I had to add a way to get the byteLength from a chunk since they're not always binary. For Web streams it's easy since they're always typed arrays. For Node streams it's trickier so we use the byteLength helper which may not be very efficient. Might be worth eagerly encoding them to UTF8 - perhaps only for this case.
1 parent 2cc4128 commit e026324

File tree

11 files changed

+267
-42
lines changed

11 files changed

+267
-42
lines changed

packages/react-client/src/ReactFlightClient.js

Lines changed: 152 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,14 @@ export type JSONValue =
5252
| {+[key: string]: JSONValue}
5353
| $ReadOnlyArray<JSONValue>;
5454

55+
const ROW_ID = 0;
56+
const ROW_TAG = 1;
57+
const ROW_LENGTH = 2;
58+
const ROW_CHUNK_BY_NEWLINE = 3;
59+
const ROW_CHUNK_BY_LENGTH = 4;
60+
61+
type RowParserState = 0 | 1 | 2 | 3 | 4;
62+
5563
const PENDING = 'pending';
5664
const BLOCKED = 'blocked';
5765
const RESOLVED_MODEL = 'resolved_model';
@@ -165,9 +173,13 @@ export type Response = {
165173
_bundlerConfig: SSRManifest,
166174
_callServer: CallServerCallback,
167175
_chunks: Map<number, SomeChunk<any>>,
168-
_partialRow: string,
169176
_fromJSON: (key: string, value: JSONValue) => any,
170177
_stringDecoder: StringDecoder,
178+
_rowState: RowParserState,
179+
_rowID: number, // parts of a row ID parsed so far
180+
_rowTag: number, // 0 indicates that we're currently parsing the row ID
181+
_rowLength: number, // remaining bytes in the row. 0 indicates that we're looking for a newline.
182+
_buffer: Array<Uint8Array>, // chunks received so far as part of this row
171183
};
172184

173185
function readChunk<T>(chunk: SomeChunk<T>): T {
@@ -276,6 +288,14 @@ function createResolvedModuleChunk<T>(
276288
return new Chunk(RESOLVED_MODULE, value, null, response);
277289
}
278290

291+
function createInitializedTextChunk(
292+
response: Response,
293+
value: string,
294+
): InitializedChunk<string> {
295+
// $FlowFixMe[invalid-constructor] Flow doesn't support functions as constructors
296+
return new Chunk(INITIALIZED, value, null, response);
297+
}
298+
279299
function resolveModelChunk<T>(
280300
chunk: SomeChunk<T>,
281301
value: UninitializedModel,
@@ -665,9 +685,13 @@ export function createResponse(
665685
_bundlerConfig: bundlerConfig,
666686
_callServer: callServer !== undefined ? callServer : missingCall,
667687
_chunks: chunks,
668-
_partialRow: '',
669688
_stringDecoder: createStringDecoder(),
670689
_fromJSON: (null: any),
690+
_rowState: 0,
691+
_rowID: 0,
692+
_rowTag: 0,
693+
_rowLength: 0,
694+
_buffer: [],
671695
};
672696
// Don't inline this call because it causes closure to outline the call above.
673697
response._fromJSON = createFromJSONCallback(response);
@@ -688,6 +712,13 @@ function resolveModel(
688712
}
689713
}
690714

715+
function resolveText(response: Response, id: number, text: string): void {
716+
const chunks = response._chunks;
717+
// We assume that we always reference large strings after they've been
718+
// emitted.
719+
chunks.set(id, createInitializedTextChunk(response, text));
720+
}
721+
691722
function resolveModule(
692723
response: Response,
693724
id: number,
@@ -802,33 +833,40 @@ function resolveHint(
802833
code: string,
803834
model: UninitializedModel,
804835
): void {
805-
const hintModel = parseModel<HintModel>(response, model);
836+
const hintModel: HintModel = parseModel(response, model);
806837
dispatchHint(code, hintModel);
807838
}
808839

809-
function processFullRow(response: Response, row: string): void {
810-
if (row === '') {
811-
return;
840+
function processFullRow(
841+
response: Response,
842+
id: number,
843+
tag: number,
844+
buffer: Array<Uint8Array>,
845+
lastChunk: string | Uint8Array,
846+
): void {
847+
let row = '';
848+
const stringDecoder = response._stringDecoder;
849+
for (let i = 0; i < buffer.length; i++) {
850+
const chunk = buffer[i];
851+
row += readPartialStringChunk(stringDecoder, chunk);
852+
}
853+
if (typeof lastChunk === 'string') {
854+
row += lastChunk;
855+
} else {
856+
row += readFinalStringChunk(stringDecoder, lastChunk);
812857
}
813-
const colon = row.indexOf(':', 0);
814-
const id = parseInt(row.slice(0, colon), 16);
815-
const tag = row[colon + 1];
816-
// When tags that are not text are added, check them here before
817-
// parsing the row as text.
818-
// switch (tag) {
819-
// }
820858
switch (tag) {
821-
case 'I': {
822-
resolveModule(response, id, row.slice(colon + 2));
859+
case 73 /* "I" */: {
860+
resolveModule(response, id, row);
823861
return;
824862
}
825-
case 'H': {
826-
const code = row[colon + 2];
827-
resolveHint(response, code, row.slice(colon + 3));
863+
case 72 /* "H" */: {
864+
const code = row[0];
865+
resolveHint(response, code, row.slice(1));
828866
return;
829867
}
830-
case 'E': {
831-
const errorInfo = JSON.parse(row.slice(colon + 2));
868+
case 69 /* "E" */: {
869+
const errorInfo = JSON.parse(row);
832870
if (__DEV__) {
833871
resolveErrorDev(
834872
response,
@@ -842,9 +880,13 @@ function processFullRow(response: Response, row: string): void {
842880
}
843881
return;
844882
}
883+
case 84 /* "T" */: {
884+
resolveText(response, id, row);
885+
return;
886+
}
845887
default: {
846888
// We assume anything else is JSON.
847-
resolveModel(response, id, row.slice(colon + 1));
889+
resolveModel(response, id, row);
848890
return;
849891
}
850892
}
@@ -854,18 +896,96 @@ export function processBinaryChunk(
854896
response: Response,
855897
chunk: Uint8Array,
856898
): void {
857-
const stringDecoder = response._stringDecoder;
858-
let linebreak = chunk.indexOf(10); // newline
859-
while (linebreak > -1) {
860-
const fullrow =
861-
response._partialRow +
862-
readFinalStringChunk(stringDecoder, chunk.subarray(0, linebreak));
863-
processFullRow(response, fullrow);
864-
response._partialRow = '';
865-
chunk = chunk.subarray(linebreak + 1);
866-
linebreak = chunk.indexOf(10); // newline
899+
let i = 0;
900+
let rowState = response._rowState;
901+
let rowID = response._rowID;
902+
let rowTag = response._rowTag;
903+
let rowLength = response._rowLength;
904+
const buffer = response._buffer;
905+
const chunkLength = chunk.length;
906+
while (i < chunkLength) {
907+
let lastIdx = -1;
908+
switch (rowState) {
909+
case ROW_ID: {
910+
const byte = chunk[i++];
911+
if (byte === 58 /* ":" */) {
912+
// Finished the rowID, next we'll parse the tag.
913+
rowState = ROW_TAG;
914+
} else {
915+
rowID = (rowID << 4) | (byte > 96 ? byte - 87 : byte - 48);
916+
}
917+
continue;
918+
}
919+
case ROW_TAG: {
920+
const resolvedRowTag = chunk[i];
921+
if (resolvedRowTag === 84 /* "T" */) {
922+
rowTag = resolvedRowTag;
923+
rowState = ROW_LENGTH;
924+
i++;
925+
} else if (resolvedRowTag > 64 && resolvedRowTag < 91 /* "A"-"Z" */) {
926+
rowTag = resolvedRowTag;
927+
rowState = ROW_CHUNK_BY_NEWLINE;
928+
i++;
929+
} else {
930+
rowTag = 0;
931+
rowState = ROW_CHUNK_BY_NEWLINE;
932+
// This was an unknown tag so it was probably part of the data.
933+
}
934+
continue;
935+
}
936+
case ROW_LENGTH: {
937+
const byte = chunk[i++];
938+
if (byte === 44 /* "," */) {
939+
// Finished the rowLength, next we'll buffer up to that length.
940+
rowState = ROW_CHUNK_BY_LENGTH;
941+
} else {
942+
rowLength = (rowLength << 4) | (byte > 96 ? byte - 87 : byte - 48);
943+
}
944+
continue;
945+
}
946+
case ROW_CHUNK_BY_NEWLINE: {
947+
// We're looking for a newline
948+
lastIdx = chunk.indexOf(10 /* "\n" */, i);
949+
break;
950+
}
951+
case ROW_CHUNK_BY_LENGTH: {
952+
// We're looking for the remaining byte length
953+
if (i + rowLength <= chunk.length) {
954+
lastIdx = i + rowLength;
955+
}
956+
break;
957+
}
958+
}
959+
if (lastIdx > -1) {
960+
// We found the last chunk of the row
961+
const offset = chunk.byteOffset + i;
962+
const length = lastIdx - i;
963+
const lastChunk = new Uint8Array(chunk.buffer, offset, length);
964+
processFullRow(response, rowID, rowTag, buffer, lastChunk);
965+
// Reset state machine for a new row
966+
rowState = ROW_ID;
967+
rowTag = 0;
968+
rowID = 0;
969+
rowLength = 0;
970+
buffer.length = 0;
971+
i = lastIdx + 1;
972+
} else {
973+
// The rest of this row is in a future chunk. We stash the rest of the
974+
// current chunk until we can process the full row.
975+
const offset = chunk.byteOffset + i;
976+
const length = chunk.byteLength - i;
977+
const remainingSlice = new Uint8Array(chunk.buffer, offset, length);
978+
buffer.push(remainingSlice);
979+
// Update how many bytes we're still waiting for. If we're looking for
980+
// a newline, this doesn't hurt since we'll just ignore it.
981+
rowLength -= remainingSlice.byteLength;
982+
break;
983+
}
867984
}
868-
response._partialRow += readPartialStringChunk(stringDecoder, chunk);
985+
response._rowState = rowState;
986+
response._rowID = rowID;
987+
response._rowTag = rowTag;
988+
response._rowLength = rowLength;
869989
}
870990

871991
function parseModel<T>(response: Response, json: UninitializedModel): T {

packages/react-dom-bindings/src/server/ReactDOMLegacyServerStreamConfig.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@ export function clonePrecomputedChunk(
5757
return chunk;
5858
}
5959

60+
export function byteLengthOfChunk(chunk: Chunk | PrecomputedChunk): number {
61+
throw new Error('Not implemented.');
62+
}
63+
6064
export function closeWithError(destination: Destination, error: mixed): void {
6165
// $FlowFixMe[incompatible-call]: This is an Error object or the destination accepts other types.
6266
destination.destroy(error);

packages/react-server-dom-fb/src/ReactServerStreamConfigFB.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ export function clonePrecomputedChunk(
6363
return chunk;
6464
}
6565

66+
export function byteLengthOfChunk(chunk: Chunk | PrecomputedChunk): number {
67+
throw new Error('Not implemented.');
68+
}
69+
6670
export function closeWithError(destination: Destination, error: mixed): void {
6771
destination.done = true;
6872
destination.fatal = true;

packages/react-server-dom-webpack/src/__tests__/ReactFlightDOMEdge-test.js

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,4 +98,25 @@ describe('ReactFlightDOMEdge', () => {
9898
const result = await readResult(ssrStream);
9999
expect(result).toEqual('<span>Client Component</span>');
100100
});
101+
102+
it('should encode long string in a compact format', async () => {
103+
const testString = '"\n\t'.repeat(500) + '🙃';
104+
105+
const stream = ReactServerDOMServer.renderToReadableStream({
106+
text: testString,
107+
});
108+
const [stream1, stream2] = stream.tee();
109+
110+
const serializedContent = await readResult(stream1);
111+
// The content should be compact an unescaped
112+
expect(serializedContent.length).toBeLessThan(2000);
113+
expect(serializedContent).not.toContain('\\n');
114+
expect(serializedContent).not.toContain('\\t');
115+
expect(serializedContent).not.toContain('\\"');
116+
expect(serializedContent).toContain('\t');
117+
118+
const result = await ReactServerDOMClient.createFromReadableStream(stream2);
119+
// Should still match the result when parsed
120+
expect(result.text).toBe(testString);
121+
});
101122
});

packages/react-server-dom-webpack/src/__tests__/ReactFlightDOMNode-test.js

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,4 +104,31 @@ describe('ReactFlightDOMNode', () => {
104104
const result = await readResult(ssrStream);
105105
expect(result).toEqual('<span>Client Component</span>');
106106
});
107+
108+
it('should encode long string in a compact format', async () => {
109+
const testString = '"\n\t'.repeat(500) + '🙃';
110+
111+
const stream = ReactServerDOMServer.renderToPipeableStream({
112+
text: testString,
113+
});
114+
115+
const readable = new Stream.PassThrough();
116+
117+
const stringResult = readResult(readable);
118+
const parsedResult = ReactServerDOMClient.createFromNodeStream(readable);
119+
120+
stream.pipe(readable);
121+
122+
const serializedContent = await stringResult;
123+
// The content should be compact an unescaped
124+
expect(serializedContent.length).toBeLessThan(2000);
125+
expect(serializedContent).not.toContain('\\n');
126+
expect(serializedContent).not.toContain('\\t');
127+
expect(serializedContent).not.toContain('\\"');
128+
expect(serializedContent).toContain('\t');
129+
130+
const result = await parsedResult;
131+
// Should still match the result when parsed
132+
expect(result.text).toBe(testString);
133+
});
107134
});

0 commit comments

Comments
 (0)