Skip to content

Commit 5eb064f

Browse files
Jacopo Beschiintrip
authored andcommitted
Update and re-run generation scripts
- Update comments to point to the correct files - bin/generate_digits_characters was missing
1 parent 3653635 commit 5eb064f

File tree

5 files changed

+117
-3
lines changed

5 files changed

+117
-3
lines changed

bin/generate_digits_characters

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#!/usr/bin/env ruby
2+
3+
require "net/http"
4+
require "csv"
5+
6+
# Retrieve all the Unicode digits grouped by script and stores them in a .csv file.
7+
8+
puts "loading..."
9+
10+
unicode_util_uri = URI("https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%3Adigit%3A%5D&g=&i=")
11+
html = Net::HTTP.get(unicode_util_uri)
12+
13+
# Based on the response HTML.
14+
CHARACTER_OR_REPETITION_REGEXP = %r{u\+([a-z0-9]+)\</a\>\</code\>\</td\>\<td\>([a-z\- ]+)}i
15+
digits_and_scripts = html.scan(CHARACTER_OR_REPETITION_REGEXP)
16+
17+
digits_and_scripts.map! do |digit, script|
18+
# Rely on String.undump to convert unicode codepoints to utf8 characters.
19+
unicode_digit = digit.to_i(16).chr("utf-8")
20+
script_name = script.split(" ").tap(&:pop).join("_").downcase
21+
[ unicode_digit, script_name ]
22+
end
23+
24+
puts "processed #{digits_and_scripts.length} characters"
25+
26+
csv = CSV.generate do |csv|
27+
digits_and_scripts.each do |digit, script|
28+
csv << [ digit, script ]
29+
end
30+
end
31+
32+
File.write(File.join(__dir__, "../", "lib/homographic_spoofing/detector/rule/data/digits.csv"), csv)
33+
34+
puts "done"

lib/homographic_spoofing/detector/rule/data/allowed_idn_characters.txt

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

lib/homographic_spoofing/detector/rule/data/digits.csv

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,16 @@
388388
𐴷,hanifi_rohingya_digit
389389
𐴸,hanifi_rohingya_digit
390390
𐴹,hanifi_rohingya_digit
391+
𐵀,garay_digit
392+
𐵁,garay_digit
393+
𐵂,garay_digit
394+
𐵃,garay_digit
395+
𐵄,garay_digit
396+
𐵅,garay_digit
397+
𐵆,garay_digit
398+
𐵇,garay_digit
399+
𐵈,garay_digit
400+
𐵉,garay_digit
391401
𑁦,brahmi_digit
392402
𑁧,brahmi_digit
393403
𑁨,brahmi_digit
@@ -478,6 +488,26 @@
478488
𑛇,takri_digit
479489
𑛈,takri_digit
480490
𑛉,takri_digit
491+
𑛐,myanmar_pao_digit
492+
𑛑,myanmar_pao_digit
493+
𑛒,myanmar_pao_digit
494+
𑛓,myanmar_pao_digit
495+
𑛔,myanmar_pao_digit
496+
𑛕,myanmar_pao_digit
497+
𑛖,myanmar_pao_digit
498+
𑛗,myanmar_pao_digit
499+
𑛘,myanmar_pao_digit
500+
𑛙,myanmar_pao_digit
501+
𑛚,myanmar_eastern_pwo_karen_digit
502+
𑛛,myanmar_eastern_pwo_karen_digit
503+
𑛜,myanmar_eastern_pwo_karen_digit
504+
𑛝,myanmar_eastern_pwo_karen_digit
505+
𑛞,myanmar_eastern_pwo_karen_digit
506+
𑛟,myanmar_eastern_pwo_karen_digit
507+
𑛠,myanmar_eastern_pwo_karen_digit
508+
𑛡,myanmar_eastern_pwo_karen_digit
509+
𑛢,myanmar_eastern_pwo_karen_digit
510+
𑛣,myanmar_eastern_pwo_karen_digit
481511
𑜰,ahom_digit
482512
𑜱,ahom_digit
483513
𑜲,ahom_digit
@@ -508,6 +538,16 @@
508538
𑥗,dives_akuru_digit
509539
𑥘,dives_akuru_digit
510540
𑥙,dives_akuru_digit
541+
𑯰,sunuwar_digit
542+
𑯱,sunuwar_digit
543+
𑯲,sunuwar_digit
544+
𑯳,sunuwar_digit
545+
𑯴,sunuwar_digit
546+
𑯵,sunuwar_digit
547+
𑯶,sunuwar_digit
548+
𑯷,sunuwar_digit
549+
𑯸,sunuwar_digit
550+
𑯹,sunuwar_digit
511551
𑱐,bhaiksuki_digit
512552
𑱑,bhaiksuki_digit
513553
𑱒,bhaiksuki_digit
@@ -548,6 +588,16 @@
548588
𑽗,kawi_digit
549589
𑽘,kawi_digit
550590
𑽙,kawi_digit
591+
𖄰,gurung_khema_digit
592+
𖄱,gurung_khema_digit
593+
𖄲,gurung_khema_digit
594+
𖄳,gurung_khema_digit
595+
𖄴,gurung_khema_digit
596+
𖄵,gurung_khema_digit
597+
𖄶,gurung_khema_digit
598+
𖄷,gurung_khema_digit
599+
𖄸,gurung_khema_digit
600+
𖄹,gurung_khema_digit
551601
𖩠,mro_digit
552602
𖩡,mro_digit
553603
𖩢,mro_digit
@@ -578,6 +628,26 @@
578628
𖭗,pahawh_hmong_digit
579629
𖭘,pahawh_hmong_digit
580630
𖭙,pahawh_hmong_digit
631+
𖵰,kirat_rai_digit
632+
𖵱,kirat_rai_digit
633+
𖵲,kirat_rai_digit
634+
𖵳,kirat_rai_digit
635+
𖵴,kirat_rai_digit
636+
𖵵,kirat_rai_digit
637+
𖵶,kirat_rai_digit
638+
𖵷,kirat_rai_digit
639+
𖵸,kirat_rai_digit
640+
𖵹,kirat_rai_digit
641+
𜳰,outlined_digit
642+
𜳱,outlined_digit
643+
𜳲,outlined_digit
644+
𜳳,outlined_digit
645+
𜳴,outlined_digit
646+
𜳵,outlined_digit
647+
𜳶,outlined_digit
648+
𜳷,outlined_digit
649+
𜳸,outlined_digit
650+
𜳹,outlined_digit
581651
𝟎,mathematical_bold_digit
582652
𝟏,mathematical_bold_digit
583653
𝟐,mathematical_bold_digit
@@ -658,6 +728,16 @@
658728
𞓷,nag_mundari_digit
659729
𞓸,nag_mundari_digit
660730
𞓹,nag_mundari_digit
731+
𞗱,ol_onal_digit
732+
𞗲,ol_onal_digit
733+
𞗳,ol_onal_digit
734+
𞗴,ol_onal_digit
735+
𞗵,ol_onal_digit
736+
𞗶,ol_onal_digit
737+
𞗷,ol_onal_digit
738+
𞗸,ol_onal_digit
739+
𞗹,ol_onal_digit
740+
𞗺,ol_onal_digit
661741
𞥐,adlam_digit
662742
𞥑,adlam_digit
663743
𞥒,adlam_digit

lib/homographic_spoofing/detector/rule/disallowed_characters.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def allowed_chars_set
128128
end
129129

130130
private
131-
# Built with script/development/generate_allowed_idn_characters.rb
131+
# Built with bin/generate_allowed_idn_characters
132132
def read_allowed_idn_chars
133133
File.read("#{__dir__}/data/allowed_idn_characters.txt")
134134
end

lib/homographic_spoofing/detector/rule/mixed_digits.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def build_digits_map
2323
end
2424
end
2525

26-
# Built with script/development/generate_digits_characters.rb
26+
# Built with bin/generate_digits_characters
2727
def read_digits
2828
File.read("#{__dir__}/data/digits.csv")
2929
end

0 commit comments

Comments
 (0)