From c0a7696c71435d0a00f9fdc55f6e0388650939d1 Mon Sep 17 00:00:00 2001 From: Christian Helmuth Date: Tue, 28 Sep 2021 10:03:45 +0200 Subject: [PATCH] tool/dts/extract: convert regex strings to latin1 The former encoding was UTF-8, which works quite well if LC_CTYPE is ensured to be an UTF-8 codeset (e.g., en_US.UTF-8 or C.UTF-8 . But, if LC_CTYPE is set to C or latin1 for example, the Tcl regex library enters an infinite loop because of unexpected characters used as markers n the strings (e.g., SECTION SIGN U+00A7). Therefore, the extract tool was converted to latin1 with the following commands and now works for LC_CTYPE C and UTF-8 codesets. iconv -f utf-8 -t latin1 tool/dts/extract > /tmp/e cp /tmp/e tool/dts/extract --- tool/dts/extract | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tool/dts/extract b/tool/dts/extract index 7eba61e77d..499bfed2b2 100755 --- a/tool/dts/extract +++ b/tool/dts/extract @@ -132,7 +132,7 @@ proc sub_token {token token_type} { global tok_text if {$token == ""} { return "" } - if {[regexp "§($token_type\\d+)°" $tok_text($token) dummy sub_token]} { + if {[regexp "§($token_type\\d+)°" $tok_text($token) dummy sub_token]} { return $sub_token } else { return "" @@ -153,11 +153,11 @@ proc labels_referenced_by_node {token} { while {$output != ""} { # consume plain text - if {[regexp {^[^§]+} $output plain]} { - regsub {^[^§]+} $output "" output } + if {[regexp {^[^§]+} $output plain]} { + regsub {^[^§]+} $output "" output } # consume token - if {[regexp {§(.+?)°} $output dummy subtoken]} { + if {[regexp {§(.+?)°} $output dummy subtoken]} { # collect label reference if {[tok_type $subtoken] == "reflabelname"} { @@ -167,7 +167,7 @@ proc labels_referenced_by_node {token} { if {[tok_type $subtoken] != "node"} { set result [concat $result [labels_referenced_by_node $subtoken]] } - regsub {§(.+?)°} $output "" output + regsub {§(.+?)°} $output "" output } } return [lsort -unique $result] @@ -199,7 +199,7 @@ proc collect_label_and_references_of_node {token path} { set selected($path) 0 - if {[regexp {§(labeldef\d+)°} $node_text dummy]} { + if {[regexp {§(labeldef\d+)°} $node_text dummy]} { set label_name $tok_text([sub_token [sub_token $token labeldef] labelname]) set labels($label_name) $path } @@ -247,11 +247,11 @@ proc collect_labels_and_references {{token content0} {curr_path ""}} { while {$output != ""} { # consume plain text - if {[regexp {^[^§]+} $output plain]} { - regsub {^[^§]+} $output "" output } + if {[regexp {^[^§]+} $output plain]} { + regsub {^[^§]+} $output "" output } # consume token - if {[regexp {§(.+?)°} $output dummy token]} { + if {[regexp {§(.+?)°} $output dummy token]} { # try to enter node or nodesupplement set path [sub_node_path $token $curr_path] @@ -259,7 +259,7 @@ proc collect_labels_and_references {{token content0} {curr_path ""}} { if {$path != $curr_path} { collect_label_and_references_of_node $token $path } - regsub {§(.+?)°} $output "" output + regsub {§(.+?)°} $output "" output } } } @@ -274,19 +274,19 @@ proc dump_selected_source {{token content0} {curr_path ""}} { while {$output != ""} { # consume plain text - if {[regexp {^[^§]+} $output plain]} { - regsub -all {³} $plain "\\&" plain + if {[regexp {^[^§]+} $output plain]} { + regsub -all {³} $plain "\\&" plain if {[info exists selected($curr_path)] && $selected($curr_path)} { puts -nonewline $plain } - regsub {^[^§]+} $output "" output + regsub {^[^§]+} $output "" output } # consume token - if {[regexp {§(.+?)°} $output dummy token]} { + if {[regexp {§(.+?)°} $output dummy token]} { dump_selected_source $token [sub_node_path $token $curr_path] - regsub {§(.+?)°} $output "" output + regsub {§(.+?)°} $output "" output } } }