From c0a7696c71435d0a00f9fdc55f6e0388650939d1 Mon Sep 17 00:00:00 2001
From: Christian Helmuth <christian.helmuth@genode-labs.com>
Date: Tue, 28 Sep 2021 10:03:45 +0200
Subject: [PATCH] tool/dts/extract: convert regex strings to latin1

The former encoding was UTF-8, which works quite well if LC_CTYPE is
ensured to be an UTF-8 codeset (e.g., en_US.UTF-8 or C.UTF-8 . But, if
LC_CTYPE is set to C or latin1 for example, the Tcl regex library enters
an infinite loop because of unexpected characters used as markers
n the strings (e.g., SECTION SIGN U+00A7).

Therefore, the extract tool was converted to latin1 with the following
commands and now works for LC_CTYPE C and UTF-8 codesets.

   iconv -f utf-8 -t latin1 tool/dts/extract > /tmp/e
   cp /tmp/e tool/dts/extract
---
 tool/dts/extract | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/tool/dts/extract b/tool/dts/extract
index 7eba61e77d..499bfed2b2 100755
--- a/tool/dts/extract
+++ b/tool/dts/extract
@@ -132,7 +132,7 @@ proc sub_token {token token_type} {
 	global tok_text
 
 	if {$token == ""} { return "" }
-	if {[regexp "ТЇ($token_type\\d+)ТА" $tok_text($token) dummy sub_token]} {
+	if {[regexp "Ї($token_type\\d+)А" $tok_text($token) dummy sub_token]} {
 		return $sub_token
 	} else {
 		return ""
@@ -153,11 +153,11 @@ proc labels_referenced_by_node {token} {
 	while {$output != ""} {
 
 		# consume plain text
-		if {[regexp {^[^ТЇ]+} $output plain]} {
-			regsub {^[^ТЇ]+} $output "" output }
+		if {[regexp {^[^Ї]+} $output plain]} {
+			regsub {^[^Ї]+} $output "" output }
 
 		# consume token
-		if {[regexp {ТЇ(.+?)ТА} $output dummy subtoken]} {
+		if {[regexp {Ї(.+?)А} $output dummy subtoken]} {
 
 			# collect label reference
 			if {[tok_type $subtoken] == "reflabelname"} {
@@ -167,7 +167,7 @@ proc labels_referenced_by_node {token} {
 			if {[tok_type $subtoken] != "node"} {
 				set result [concat $result [labels_referenced_by_node $subtoken]]
 			}
-			regsub {ТЇ(.+?)ТА} $output "" output
+			regsub {Ї(.+?)А} $output "" output
 		}
 	}
 	return [lsort -unique $result]
@@ -199,7 +199,7 @@ proc collect_label_and_references_of_node {token path} {
 
 	set selected($path) 0
 
-	if {[regexp {ТЇ(labeldef\d+)ТА} $node_text dummy]} {
+	if {[regexp {Ї(labeldef\d+)А} $node_text dummy]} {
 		set label_name $tok_text([sub_token [sub_token $token labeldef] labelname])
 		set labels($label_name) $path
 	}
@@ -247,11 +247,11 @@ proc collect_labels_and_references {{token content0} {curr_path ""}} {
 	while {$output != ""} {
 
 		# consume plain text
-		if {[regexp {^[^ТЇ]+} $output plain]} {
-			regsub {^[^ТЇ]+} $output "" output }
+		if {[regexp {^[^Ї]+} $output plain]} {
+			regsub {^[^Ї]+} $output "" output }
 
 		# consume token
-		if {[regexp {ТЇ(.+?)ТА} $output dummy token]} {
+		if {[regexp {Ї(.+?)А} $output dummy token]} {
 
 			# try to enter node or nodesupplement
 			set path [sub_node_path $token $curr_path]
@@ -259,7 +259,7 @@ proc collect_labels_and_references {{token content0} {curr_path ""}} {
 			if {$path != $curr_path} {
 				collect_label_and_references_of_node $token $path }
 
-			regsub {ТЇ(.+?)ТА} $output "" output
+			regsub {Ї(.+?)А} $output "" output
 		}
 	}
 }
@@ -274,19 +274,19 @@ proc dump_selected_source {{token content0} {curr_path ""}} {
 	while {$output != ""} {
 
 		# consume plain text
-		if {[regexp {^[^ТЇ]+} $output plain]} {
-			regsub -all {ТГ}  $plain "\\&" plain
+		if {[regexp {^[^Ї]+} $output plain]} {
+			regsub -all {Г}  $plain "\\&" plain
 
 			if {[info exists selected($curr_path)] && $selected($curr_path)} {
 				puts -nonewline $plain }
 
-			regsub {^[^ТЇ]+} $output "" output
+			regsub {^[^Ї]+} $output "" output
 		}
 
 		# consume token
-		if {[regexp {ТЇ(.+?)ТА} $output dummy token]} {
+		if {[regexp {Ї(.+?)А} $output dummy token]} {
 			dump_selected_source $token [sub_node_path $token $curr_path]
-			regsub {ТЇ(.+?)ТА} $output "" output
+			regsub {Ї(.+?)А} $output "" output
 		}
 	}
 }