Skip to content

Commit 0836cf6

Browse files
authored
Change URL detection regex to exclude trailing content (#6572)
1 parent d9f056e commit 0836cf6

File tree

3 files changed

+33
-13
lines changed

3 files changed

+33
-13
lines changed

playground/Stress/Stress.ApiService/ConsoleStresser.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,11 @@ public static void Stress()
7878
Console.WriteLine("https://www.example.com/path/with/percent%25encoded");
7979
Console.WriteLine("https://www.example.com/path/with/dollar$sign");
8080
Console.WriteLine("https://www.example.com/path/with/exclamation!mark");
81+
Console.WriteLine("https://www.example.com/;path/");
82+
Console.WriteLine("https://www.example.com/path/?query;string");
83+
Console.WriteLine("https://;www.example.com/");
84+
Console.WriteLine("https://www;.example.com/");
85+
Console.WriteLine("https://www.exa;mple.com/");
8186

8287
Console.Write("\x1b[0m"); // reset color
8388

src/Aspire.Dashboard/ConsoleLogs/UrlParser.cs

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
using System.Diagnostics.CodeAnalysis;
55
using System.Globalization;
6+
using System.Net;
67
using System.Text;
78
using System.Text.RegularExpressions;
89

@@ -34,7 +35,7 @@ public static bool TryParse(string? text, Func<string, string>? nonMatchFragment
3435
nextCharIndex = urlMatch.Index + urlMatch.Length;
3536
var url = text[urlStart..nextCharIndex];
3637

37-
builder.Append(CultureInfo.InvariantCulture, $"<a target=\"_blank\" href=\"{url}\">{url}</a>");
38+
builder.Append(CultureInfo.InvariantCulture, $"<a target=\"_blank\" href=\"{url}\">{WebUtility.HtmlEncode(url)}</a>");
3839
urlMatch = urlMatch.NextMatch();
3940
}
4041

@@ -65,17 +66,9 @@ static void AppendNonMatchFragment(StringBuilder stringBuilder, Func<string, str
6566
}
6667

6768
// Regular expression that detects http/https URLs in a log entry
68-
// Based on the RegEx used in Windows Terminal for the same purpose. Some modifications:
69-
// - Can start at a non word boundary. This behavior is similar to how GitHub matches URLs in pretty printed code.
70-
// - Limited to only http/https URLs.
71-
// - Ignore case. That means it matches URLs starting with http and HTTP.
72-
//
73-
// Explanation:
74-
// https?:// - http:// or https://
75-
// [-A-Za-z0-9+&@#/%?=~_|$!:,.;]* - Any character in the list, matched zero or more times.
76-
// [A-Za-z0-9+&@#/%=~_|$] - Any character in the list, matched exactly once
69+
// Based on the RegEx used by GitHub to detect links in content.
7770
[GeneratedRegex(
78-
"https?://[-A-Za-z0-9+&@#/%?=~_|$!:,.;]*[A-Za-z0-9+&@#/%=~_|$]",
79-
RegexOptions.IgnoreCase | RegexOptions.CultureInvariant)]
71+
@"((?<!\+)https?:\/\/(?:www\.)?(?:[-\p{L}.]+?[.@][a-zA-Z\d]{2,}|localhost)(?:[-\w\p{L}.:%+~#*$!?&/=@]*(?:,(?!\s))*?)*)",
72+
RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.ExplicitCapture)]
8073
public static partial Regex GenerateUrlRegEx();
8174
}

tests/Aspire.Dashboard.Tests/ConsoleLogsTests/UrlParserTests.cs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ public void TryParse_ReturnsCorrectResult(string input, bool expectedResult, str
4040
[InlineData("http://bing.com/", "<a target=\"_blank\" href=\"http://bing.com/\">http://bing.com/</a>")]
4141
[InlineData("http://bing.com/dir", "<a target=\"_blank\" href=\"http://bing.com/dir\">http://bing.com/dir</a>")]
4242
[InlineData("http://bing.com/index.aspx", "<a target=\"_blank\" href=\"http://bing.com/index.aspx\">http://bing.com/index.aspx</a>")]
43-
[InlineData("http://bing", "<a target=\"_blank\" href=\"http://bing\">http://bing</a>")]
43+
[InlineData("http://localhost", "<a target=\"_blank\" href=\"http://localhost\">http://localhost</a>")]
4444
public void TryParse_SupportedUrlFormats(string input, string? expectedOutput)
4545
{
4646
var result = UrlParser.TryParse(input, WebUtility.HtmlEncode, out var modifiedText);
@@ -71,6 +71,15 @@ public void TryParse_ExcludeInvalidTrailingChars(string input, string? expectedO
7171
Assert.Equal(expectedOutput, modifiedText);
7272
}
7373

74+
[Fact]
75+
public void TryParse_QueryString()
76+
{
77+
var result = UrlParser.TryParse("https://www.example.com?query=string&param=value", WebUtility.HtmlEncode, out var modifiedText);
78+
Assert.True(result);
79+
80+
Assert.Equal("<a target=\"_blank\" href=\"https://www.example.com?query=string&param=value\">https://www.example.com?query=string&amp;param=value</a>", modifiedText);
81+
}
82+
7483
[Theory]
7584
[InlineData("http://www.localhost:8080")]
7685
[InlineData("HTTP://WWW.LOCALHOST:8080")]
@@ -83,4 +92,17 @@ public void GenerateUrlRegEx_MatchUrlAfterContent(string content)
8392
var match = regex.Match(content);
8493
Assert.Equal("http://www.localhost:8080", match.Value.ToLowerInvariant());
8594
}
95+
96+
[Theory]
97+
[InlineData("http://www.localhost:8080!", "http://www.localhost:8080!")]
98+
[InlineData("http://www.localhost:8080/path!", "http://www.localhost:8080/path!")]
99+
[InlineData("http://www.localhost:8080/path;", "http://www.localhost:8080/path")]
100+
[InlineData("http://www.localhost:8080;", "http://www.localhost:8080")]
101+
[InlineData("http://www.local;host:8080;", "http://www.local")]
102+
public void GenerateUrlRegEx_MatchUrlBeforeContent(string content, string expected)
103+
{
104+
var regex = UrlParser.GenerateUrlRegEx();
105+
var match = regex.Match(content);
106+
Assert.Equal(expected, match.Value.ToLowerInvariant());
107+
}
86108
}

0 commit comments

Comments
 (0)