Skip to content

Commit 97f31ba

Browse files
Introduce toggle for FuzzedDataProvider to only return printable strings (#268)
The change aims at being able to toggle this individually for each time we call a `ConsumeStr*` method, which would allow us to pass lossy strings and proper ASCII strings as function inputs, instead of globally toggling ASCII-only mutations by setting the appropriate libfuzzer flag and/or using the constructor for the FuzzedDataProvider Instead of just filtering out non-printable ones and having to re-read from the data buffer (multiple times) I opted for a (hopefully lightweight) conversion, so that each non-printable one is converted into a printable one.
1 parent f989212 commit 97f31ba

File tree

2 files changed

+91
-10
lines changed

2 files changed

+91
-10
lines changed

packages/core/FuzzedDataProvider.test.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -945,6 +945,24 @@ describe("FuzzedDataProvider checks", () => {
945945
expect(strings).toContain("or si");
946946
expect(strings).toContain("t ame");
947947
});
948+
it("verifyPrintableString", () => {
949+
const data = new FuzzedDataProvider(Buffer.from(Data));
950+
const consumedStrAsArr = [...data.consumeString(1024, "ascii", true)];
951+
consumedStrAsArr.forEach((c) => {
952+
const charAsNum = c.charCodeAt(0);
953+
expect(charAsNum >= 32 && charAsNum <= 126).toBeTruthy();
954+
});
955+
});
956+
it("verifyNonPrintableString", () => {
957+
const data = new FuzzedDataProvider(Buffer.from(Data));
958+
const consumedStrAsArr = [...data.consumeString(1024)];
959+
expect(
960+
consumedStrAsArr.some((ele) => {
961+
const eleAsNum = ele.charCodeAt(0);
962+
return eleAsNum < 32 || eleAsNum > 126;
963+
})
964+
).toBeTruthy();
965+
});
948966
});
949967

950968
const Data = Buffer.from([

packages/core/FuzzedDataProvider.ts

Lines changed: 73 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ export class FuzzedDataProvider {
2626
private dataPtr = -1;
2727
/** The number of remaining bytes that can be consumed from the fuzzer input data. */
2828
_remainingBytes = 0;
29+
/**
30+
* A lookup table that maps input values to output characters in a cyclical manner.
31+
* The output characters are evenly distributed across the range of printable ASCII characters (32-126)
32+
*/
33+
private lookupTable = new Uint8Array(256);
2934

3035
static readonly min_float = -3.4028235e38;
3136
static readonly max_float = 3.4028235e38;
@@ -41,6 +46,18 @@ export class FuzzedDataProvider {
4146
this.dataPtr = 0;
4247
this._remainingBytes = data.length;
4348
}
49+
50+
/**
51+
* Populate the lookup table with a mapping of input values to output characters
52+
*/
53+
let nextChar = 32;
54+
for (let i = 0; i < 256; i++) {
55+
this.lookupTable[i] = nextChar;
56+
nextChar++;
57+
if (nextChar > 126) {
58+
nextChar = 32;
59+
}
60+
}
4461
}
4562

4663
/**
@@ -372,33 +389,76 @@ export class FuzzedDataProvider {
372389
* is not sufficiently long.
373390
* @param maxLength the maximum length of the string
374391
* @param encoding the encoding of the string
392+
* @param printable - a boolean, which defaults to false that indicates whether consumed strings
393+
* should be forced to contain only valid printable characters
375394
* @returns a `string` of length between 0 and `maxLength` (inclusive)
376395
*/
377396
consumeString(
378397
maxLength: number,
379-
encoding: BufferEncoding | undefined = "ascii"
398+
encoding: BufferEncoding | undefined = "ascii",
399+
printable: boolean | undefined = false
380400
): string {
381401
if (maxLength < 0) throw new Error("maxLength must be non-negative");
402+
let result;
382403
const arrayLength = Math.min(maxLength, this._remainingBytes);
383-
const result = this.data.toString(
384-
encoding,
385-
this.dataPtr,
386-
this.dataPtr + arrayLength
387-
);
404+
405+
if (printable) {
406+
result = this.bufToPrintableString(
407+
this.data,
408+
this.dataPtr,
409+
this.dataPtr + arrayLength,
410+
encoding
411+
);
412+
} else {
413+
result = this.data.toString(
414+
encoding,
415+
this.dataPtr,
416+
this.dataPtr + arrayLength
417+
);
418+
}
388419
this.dataPtr += arrayLength;
389420
this._remainingBytes -= arrayLength;
390421
return result;
391422
}
392423

424+
/**
425+
* Helper function that converts the given string type into one that only
426+
* contains printable characters. Elements in `buf` that are already in
427+
* ASCII printable range are not undergoing any conversion.
428+
* Known limitations:
429+
* numbers [32; 97] will have the probability of about 0.01172 of occuring,
430+
* numbers [98; 126] will have probability of 0.00781 of occurring.
431+
* @param buf - Buffer that contains arbitrary values
432+
* @param min - lower bound at which processing of the provided `Buffer` shall begin
433+
* @param max - upper bound, analogous to the lower bound
434+
* @param encoding - a valid `BufferEncoding`.
435+
* @returns a string that was sanitized and only contains printable characters
436+
*/
437+
bufToPrintableString(
438+
buf: Buffer,
439+
min: number,
440+
max: number,
441+
encoding: BufferEncoding
442+
): string {
443+
const newBuf = new Uint8Array(max - min);
444+
for (let i = min; i < max; i++) {
445+
newBuf[i - min] = this.lookupTable[buf[i]];
446+
}
447+
return new TextDecoder(encoding).decode(newBuf);
448+
}
449+
393450
/**
394451
* Consumes the remaining bytes of the fuzzer input as a string.
395452
* @param encoding - the encoding of the string
453+
* @param printable - a boolean, which defaults to false that indicates whether consumed strings
454+
* should be forced to contain only valid printable characters
396455
* @returns a string constructed from the remaining bytes of the fuzzer input using the given encoding
397456
*/
398457
consumeRemainingAsString(
399-
encoding: BufferEncoding | undefined = "ascii"
458+
encoding: BufferEncoding | undefined = "ascii",
459+
printable: boolean | undefined = false
400460
): string {
401-
return this.consumeString(this._remainingBytes, encoding);
461+
return this.consumeString(this._remainingBytes, encoding, printable);
402462
}
403463

404464
/**
@@ -408,16 +468,19 @@ export class FuzzedDataProvider {
408468
* @param maxArrayLength the maximum length of the array
409469
* @param maxStringLength the maximum length of the strings
410470
* @param encoding the encoding of the strings
471+
* @param printable - a boolean, which defaults to false that indicates whether consumed strings
472+
* should be forced to contain only valid printable characters
411473
* @returns an array containing strings constructed from the remaining bytes of the fuzzer input using the given encoding
412474
*/
413475
consumeStringArray(
414476
maxArrayLength: number,
415477
maxStringLength: number,
416-
encoding: BufferEncoding | undefined = "ascii"
478+
encoding: BufferEncoding | undefined = "ascii",
479+
printable: boolean | undefined = false
417480
) {
418481
const strs = [];
419482
while (strs.length < maxArrayLength && this.remainingBytes > 0) {
420-
const str = this.consumeString(maxStringLength, encoding);
483+
const str = this.consumeString(maxStringLength, encoding, printable);
421484
if (str) {
422485
strs.push(str);
423486
}

0 commit comments

Comments
 (0)