[PATCH] mpers: implement gawk 3 support

Sat Jan 20 13:53:29 UTC 2018

On Thu, Jan 18, 2018 at 10:41:54AM +0100, Eugene Syromyatnikov wrote:
> Some old systems that still make some sense to be supported have only
> gawk 3, so let's support them for now.
> 
> In order to achieve that, multiple changes have been implemented:
>  - Multidimensional arrays are replaced with single-dimensional ones.
>    In most places it's a "][" -> ", " replacement, as awk allows some
>    kind of emulation of multidimensional arrays that way, but in several
>    occasions (specifically for storing name and special fields) we have
>    to iterate over them later, so we store that information in
>    additional arrays in order to get the keys.
>  - "switch" statements are replaced with sets of "if ... else if ...
>    else" statements.  This change is trivial, except we've added
>    a temporary variable in what_is order to store expression value, for
>    readability purposes.
>  - No support for array iteration ordering.  This one is most ugly of
>    them all.  Luckily, not that ugly, we've just had to process index a
>    bit in order to make it lexicographically sortable and add two
>    temporary arrays containing sorted indices in order to sort over them
>    instead of those two arrays that we've added in order to work around
>    lack of multidimensional array support.
> 
> * mpers.awk (compare_indices): Remove unused function.
> (array_get, update_upper_bound, /^DW_AT_data_member_location/,
> /^DW_AT_byte_size/, /^DW_AT_encoding/): Replace multidimensional array
> access with comma-concatenated index.
> (norm_idx): New function.
> (array_seq): Replace multidimensional array access with
> comma-concatenated index.  Use comma-concatenated pair of (array_idx,
> "seq") in order to check presence of the item in an array.
> (what_is): Add enc local variable.  Store the value of array[what_idx,
> "encoding"] in it.  Replace "switch" statements with sets of "if ...
> else if ... else" statements.  Replace multidimensional array access
> with comma-concatenated index. Use for (... ; ...; ...) iteration over
> aparents_sorted instead of iteration over array.
> (/^<[[:xdigit:]]+>/): Store idx as norm_idx(matches[2]).  Replace
> multidimensional array access with comma-concatenated index.  Store an
> additional flag in array_names array.
> (/^DW_AT_name/): Replace multidimensional array access with
> comma-concatenated index.  Add a flag to array_names for that idx.
> (/^DW_AT_type/): Do not capture "0x" as a part of a group, normalise
> the captured group.  Replace multidimensional array access with
> comma-concatenated index.
> (/^Abbrev Number:[^(]+\(DW_TAG_/): Replace multidimensional array access
> with comma-concatenated index.  Store additional flags in
> array_special and array_parents arrays.
> (END): Remove PROCINFO["sorted_in"] setup.  Sort array_parents.  Replace
> multidimensional array access with comma-concatenated index.  Iterate
> over array_special to go over all the items that have "special" field.
> Iterate over array_names to go over all items that have "name" field.
> ---
>  mpers.awk | 129 +++++++++++++++++++++++++++-----------------------------------
>  1 file changed, 56 insertions(+), 73 deletions(-)
> 
> diff --git a/mpers.awk b/mpers.awk
> index 9f8cb64..9359004 100644
> --- a/mpers.awk
> +++ b/mpers.awk
> @@ -27,19 +27,10 @@
>  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
>  # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>  
> -function compare_indices(i1, v1, i2, v2, \
> -			 c1, c2)
> -{
> -	c1 = strtonum(sprintf("%s", i1))
> -	c2 = strtonum(sprintf("%s", i2))
> -	if (c1 < c2)
> -		return -1
> -	return (c1 != c2)
> -}
>  function array_get(array_idx, array_member, \
>  		   array_return)
>  {
> -	array_return = array[array_idx][array_member]
> +	array_return = array[array_idx, array_member]
>  	if ("" == array_return) {
>  		printf("%s: index [%s] without %s\n",
>  		       FILENAME, array_idx, array_member) > "/dev/stderr"
> @@ -47,12 +38,16 @@ function array_get(array_idx, array_member, \
>  	}
>  	return array_return
>  }
> +function norm_idx(idx)
> +{
> +	return sprintf("%016s", idx)
> +}
>  function array_seq(array_idx)
>  {
> -	if ("seq" in array[array_idx])
> -		return array[array_idx]["seq"]
> +	if ((array_idx, "seq") in array)
> +		return array[array_idx, "seq"]
>  	index_seq++
> -	array[array_idx]["seq"] = index_seq
> +	array[array_idx, "seq"] = index_seq
>  	return index_seq
>  }
>  function enter(array_idx,
> @@ -75,72 +70,63 @@ function leave(array_idx, to_return)
>  function update_upper_bound(idx, val, \
>  			    count)
>  {
> -	count = array[idx]["count"]
> +	count = array[idx, "count"]
>  	if (count == "")
>  		count = 1
> -	array[idx]["count"] = count * val
> -	array[idx]["upper_bound"] = array[idx]["upper_bound"] "[" val "]"
> +	array[idx, "count"] = count * val
> +	array[idx, "upper_bound"] = array[idx, "upper_bound"] "[" val "]"
>  }
>  function what_is(what_idx, \
>  		 item, loc_diff, location, prev_location, prev_returned_size, \
> -		 special, to_return, type_idx)
> +		 special, to_return, type_idx, enc)
>  {
>  	enter(what_idx)
>  	special = array_get(what_idx, "special")
> -	switch (special) {
> -	case "base_type":
> -		switch (array_get(what_idx, "encoding")) {
> -		case 5: # signed
> +	if (special == "base_type") {
> +		enc = array_get(what_idx, "encoding")
> +		if (enc == 5) { # signed
>  			printf("int%s_t ",
>  			       8 * array_get(what_idx, "byte_size"))
> -			break
> -		case 7: # unsigned
> +		} else if (enc == 7) { # unsigned
>  			printf("uint%s_t ",
>  			       8 * array_get(what_idx, "byte_size"))
> -			break
> -		default: # float, signed/unsigned char
> +		} else { # float, signed/unsigned char
>  			printf("%s ", array_get(what_idx, "name"))
> -			break
>  		}
>  		returned_size = array_get(what_idx, "byte_size")
> -		break
> -	case "enumeration_type":
> +	} else if (special == "enumeration_type") {
>  		returned_size = array_get(what_idx, "byte_size")
>  		printf("uint%s_t ", 8 * returned_size)
> -		break
> -	case "pointer_type":
> +	} else if (special == "pointer_type") {
>  		printf("mpers_ptr_t ")
>  		returned_size = array_get(what_idx, "byte_size")
> -		break
> -	case "array_type":
> +	} else if (special == "array_type") {
>  		type_idx = array_get(what_idx, "type")
>  		what_is(type_idx)
> -		to_return = array[what_idx]["upper_bound"]
> +		to_return = array[what_idx, "upper_bound"]
>  		if ("" == to_return)
>  			to_return = "[0]"
> -		returned_size = array[what_idx]["count"] * returned_size
> +		returned_size = array[what_idx, "count"] * returned_size
>  		return leave(what_idx, to_return)
> -		break
> -	case "structure_type":
> +	} else if (special == "structure_type") {
>  		print "struct {"
>  		prev_location = 0
>  		location = 0
>  		returned_size = 0
>  		prev_returned_size = 0
> -		for (item in array) {
> -			if ("parent" in array[item] && \
> -				array_get(item, "parent") == what_idx) {
> -				location = array_get(item, "location")
> +		for (item = 1; item <= parents_cnt; item+=1) {

This is no longer an item, it's an index.

> +			if (array_parents[aparents_sorted[item]] == what_idx) {

aparents_sorted is not a sorted array array_parents, it's an array
of sorted keys of array_parents, let's call it aparents_keys or something.

> +				location = array_get(aparents_sorted[item], "location")
>  				loc_diff = location - prev_location - \
>  					prev_returned_size
>  				if (loc_diff != 0) {
>  					printf("unsigned char mpers_%s_%s[%s];\n",
> -					       "filler", array_seq(item), loc_diff)
> +					       "filler", array_seq(aparents_sorted[item]), loc_diff)
>  				}
>  				prev_location = location
> -				returned = what_is(item)
> +				returned = what_is(aparents_sorted[item])
>  				prev_returned_size = returned_size
> -				printf("%s%s;\n", array[item]["name"], returned)
> +				printf("%s%s;\n", array[aparents_sorted[item], "name"], returned)
>  			}
>  		}
>  		returned_size = array_get(what_idx, "byte_size")
> @@ -150,31 +136,25 @@ function what_is(what_idx, \
>  			       "end_filler", array_seq(item), loc_diff)
>  		}
>  		printf("} ATTRIBUTE_PACKED ")
> -		break
> -	case "union_type":
> +	} else if (special == "union_type") {
>  		print "union {"
> -		for (item in array) {
> -			if ("parent" in array[item] && \
> -				array_get(item, "parent") == what_idx) {
> -				returned = what_is(item)
> -				printf("%s%s;\n", array[item]["name"], returned)
> +		for (item = 1; item <= parents_cnt; item+=1) {

This is no longer an item, it's an index.

> +			if (array_parents[aparents_sorted[item]] == what_idx) {
> +				returned = what_is(aparents_sorted[item])
> +				printf("%s%s;\n", array[aparents_sorted[item], "name"], returned)
>  			}
>  		}
>  		printf("} ")
>  		returned_size = array_get(what_idx, "byte_size")
> -		break
> -	case "typedef":
> +	} else if (special == "typedef") {
>  		type_idx = array_get(what_idx, "type")
>  		return leave(what_idx, what_is(type_idx))
> -		break
> -	case "member":
> +	} else if (special == "member") {
>  		type_idx = array_get(what_idx, "type")
>  		return leave(what_idx, what_is(type_idx))
> -		break
> -	default:
> +	} else {
>  		type_idx = array_get(what_idx, "type")
>  		what_is(type_idx)
> -		break
>  	}
>  	return leave(what_idx, "")
>  }
> @@ -186,31 +166,32 @@ BEGIN {
>  /^<[[:xdigit:]]+>/ {
>  	match($0, /([[:alnum:]]+)><([[:alnum:]]+)/, matches)
>  	level = matches[1]
> -	idx = "0x" matches[2]
> -	array[idx]["idx"] = idx
> +	idx = norm_idx(matches[2])
> +	array[idx, "idx"] = idx
>  	parent[level] = idx
>  }
>  /^DW_AT_data_member_location/ {
>  	if (!match($0, /\(DW_OP_plus_uconst:[[:space:]]+([[:digit:]]+)\)/, temparray))
>  		match($0, /([[:digit:]]+)/, temparray)
> -	array[idx]["location"] = temparray[1]
> +	array[idx, "location"] = temparray[1]
>  }
>  /^DW_AT_name/ {
>  	match($0, /:[[:space:]]+([[:alpha:]_][[:alnum:]_[:space:]]*)/, \
>  		temparray)
> -	array[idx]["name"] = temparray[1]
> +	array_names[idx] = 1
> +	array[idx, "name"] = temparray[1]
>  }
>  /^DW_AT_byte_size/ {
>  	match($0, /[[:digit:]]+/, temparray)
> -	array[idx]["byte_size"] = temparray[0]
> +	array[idx, "byte_size"] = temparray[0]
>  }
>  /^DW_AT_encoding/ {
>  	match($0, /[[:digit:]]+/, temparray)
> -	array[idx]["encoding"] = temparray[0]
> +	array[idx, "encoding"] = temparray[0]
>  }
>  /^DW_AT_type/ {
> -	match($0, /:[[:space:]]+<(0x[[:xdigit:]]*)>$/, temparray)
> -	array[idx]["type"] = temparray[1]
> +	match($0, /:[[:space:]]+<0x([[:xdigit:]]*)>$/, temparray)
> +	array[idx, "type"] = norm_idx(temparray[1])
>  }
>  /^DW_AT_upper_bound/ {
>  	match($0, /[[:digit:]]+/, temparray)
> @@ -223,17 +204,19 @@ BEGIN {
>  /^Abbrev Number:[^(]+\(DW_TAG_/ {
>  	if (match($0, /typedef|union_type|structure_type|pointer_type\
>  |enumeration_type|array_type|base_type|member/, temparray)) {
> -		array[idx]["special"] = temparray[0]
> +		array_special[idx] = temparray[0]
> +		array[idx, "special"] = temparray[0]
>  		if ("pointer_type" == temparray[0])
> -			array[idx]["byte_size"] = default_pointer_size
> +			array[idx, "byte_size"] = default_pointer_size
>  		if (level > 1 && "member" == temparray[0])
> -			array[idx]["parent"] = parent[level-1]
> +			array_parents[idx] = parent[level-1]
>  	}
>  }
>  END {
> -	PROCINFO["sorted_in"] = "compare_indices"
> -	for (item in array) {
> -		if (array[item]["special"] == "pointer_type") {
> +	parents_cnt = asorti(array_parents, aparents_sorted)
> +
> +	for (item in array_special) {
> +		if (array[item, "special"] == "pointer_type") {
>  			mpers_ptr_t = \
>  				"uint" 8 * array_get(item, "byte_size") "_t"
>  			print "#ifndef mpers_ptr_t_is_" mpers_ptr_t
> @@ -243,8 +226,8 @@ END {
>  			break
>  		}
>  	}
> -	for (item in array) {
> -		if (array[item]["name"] == VAR_NAME) {
> +	for (item in array_names) {
> +		if (array[item, "name"] == VAR_NAME) {
>  			type = array_get(item, "type")
>  			print "typedef"
>  			what_is(type)

Overall looks plausible although less readable than the gawk 4 edition.

-- 
ldv
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 801 bytes
Desc: not available
URL: <http://lists.strace.io/pipermail/strace-devel/attachments/20180120/b6507b64/attachment.bin>