Note that this is the follow up question of Parse text file, change some strings to camel case, add other strings . The parsing rules are similar but different:
- The input order in the output is important.
- The input records are separated by empty lines.
- Strings just before the character '=' are considered similar either from the start or end of the strings.
- Replace the string "public static final String" with the string "export const" if that string occurs only once.
- Replace the string "public static final String" with the string "export enum" if similar strings (which has understores) occur more than once. Change all similar strings to the camel case string Str1. Append Str1 to the string "export enum".
- Keep only the string difference Str2.
- If the new strings contain only number, prefix Str2 with the camel case string Str1.
- Enclose the new strings with "{}" only once.
- Change ';' to ','.
- Replace the string "public static final int" with the string "export const" if that string occurs only once.
- Replace the string "public static final int" with the string "export enum" if similar strings (which has understores) occur more than once.
- Separate the parsed string by the first '_' character into two tokens. The first token is T1. The second token is changed to the camel case string Str1. Append Str1 to the string "export enum".
- If the new strings contain only number, prefix T1 with the camel case string Str1.
- Enclose the new strings with "{}" only once.
- Change ';' to ','.
These are sample input and output.
input
//Comment
public static final String CUSTOMER_TYPE_CD_T_01 = "01";
public static final String CUSTOMER_TYPE_CD_TB_02 = "02";
public static final String CUSTOMER_TYPE_CD_TCC_03 = "03";
public static final String CUSTOMER_TYPE_CD_TDDD_04 = "04";
public static final String TEST_ING = "TEST";
//----------------------------------------
//Comments
//----------------------------------------
public static final int BEGIN_A_BB_C_D_EE_FFF_01 = 0;
public static final int END_A_BB_C_D_EE_FFF_01 = 2;
output
//Comment
export enum CustomerTypeCd {
T_01 = "01",
TB_02 = "02",
TCC_03 = "03",
TDDD_04 = "04",
}
export const TEST_ING = "TEST";
//----------------------------------------
//Comments
//----------------------------------------
export enum ABbCDEeFff01 {
BEGIN = 0,
END = 2,
}
I modified the answer of: Parse text file, change some strings to camel case, add other strings as follows. It handles rules 1 and 2, fails to handle rules 3 and 4:
function cap(s) { return substr(s, 1, 1) tolower(substr(s, 2)) } # capitalization
function cc(s, a, b, n, i) { # return a[1] = enum name, a[2] = key
n = split(s, b, /_/); a[1] = ""
for(i = 1; i < n; i++) a[1] = a[1] cap(b[i]) # camel-case
a[2] = cap(b[n]) # key
}
function cc2(s, a, b, n, i) { # return a[1] = enum name, a[2] = key
n = split(s, b, /_/); a[1] = ""
for(i = 1; i < n - 1; i++) a[1] = a[1] cap(b[i]) # camel-case
a[2] = b[n - 1] "_" cap(b[n]) # key
}
function cc3(s, a, b, n, i) { # return a[1] = enum name, a[2] = key
n = split(s, b, /_/);
enumkey = b[1]
a[1] = ""
for(i = 1; i < n - 1; i++) a[1] = a[1] cap(b[i]) # camel-case
a[2] = b[n - 1] "_" cap(b[n]) # key
a[1] = enumkey
}
/public static final String/ {
# compute enum name (e), key (k), value without final ";" (v)
cc2($5, ek); e = ek[1]; k = ek[2]; v = $NF; sub(/;[[:space:]]*$/, "", v)
# if new enum name
if(!(e in seen)) { seen[e] = 1; ne += 1; ename[ne] = e; cname[ne] = $5 }
# add key and value
nk[ne] += 1; key[ne,nk[ne]] = k; val[ne,nk[ne]] = v
# key prefix if only-digits key
if(k ~ /^[0-9]+$/) pfx[ne] = e
}
/public static final int/ {
# compute enum name (e), key (k), value without final ";" (v)
cc3($5, ek); e = ek[1]; k = ek[2]; v = $NF; sub(/;[[:space:]]*$/, "", v)
# if new enum name
if(!(e in seen)) { seen[e] = 1; ne += 1; ename[ne] = e; cname[ne] = $5 }
# add key and value
nk[ne] += 1; key[ne,nk[ne]] = k; val[ne,nk[ne]] = v
# key prefix if only-digits key
if(k ~ /^[0-9]+$/) pfx[ne] = e
}
END {
for(i = 1; i <= ne; i++) { # for all enum/const
# if only one key-value pair => const
if(nk[i] == 1) print sep "export const " cname[i] " = " val[i,1] ";"
else { # enum
print sep "export enum " ename[i] " {"
for(j = 1; j <= nk[i]; j++) print "\t" pfx[i] key[i,j] " = " val[i,j] ","
print "}"
}
sep = "\n"
}
}
awk -V GNU Awk 5.0.1, API: 2.0 (GNU MPFR 4.0.2, GNU MP 6.2.0)
---------
EDIT: here is the above code formatted legibly by `gawk -o-`:
/public static final String/ {
# compute enum name (e), key (k), value without final ";" (v)
cc2($5, ek)
e = ek[1]
k = ek[2]
v = $NF
sub(/;[[:space:]]*$/, "", v)
# if new enum name
if (! (e in seen)) {
seen[e] = 1
ne += 1
ename[ne] = e
cname[ne] = $5
}
# add key and value
nk[ne] += 1
key[ne, nk[ne]] = k
val[ne, nk[ne]] = v
# key prefix if only-digits key
if (k ~ /^[0-9]+$/) {
pfx[ne] = e
}
}
/public static final int/ {
# compute enum name (e), key (k), value without final ";" (v)
cc3($5, ek)
e = ek[1]
k = ek[2]
v = $NF
sub(/;[[:space:]]*$/, "", v)
# if new enum name
if (! (e in seen)) {
seen[e] = 1
ne += 1
ename[ne] = e
cname[ne] = $5
}
# add key and value
nk[ne] += 1
key[ne, nk[ne]] = k
val[ne, nk[ne]] = v
# key prefix if only-digits key
if (k ~ /^[0-9]+$/) {
pfx[ne] = e
}
}
END {
for (i = 1; i <= ne; i++) { # for all enum/const
# if only one key-value pair => const
if (nk[i] == 1) {
print sep "export const " cname[i] " = " val[i, 1] ";"
} else { # enum
print sep "export enum " ename[i] " {"
for (j = 1; j <= nk[i]; j++) {
print "\t" pfx[i] key[i, j] " = " val[i, j] ","
}
print "}"
}
sep = "\n"
}
}
function cap(s)
{
return (substr(s, 1, 1) tolower(substr(s, 2)))
}
# capitalization
function cc(s, a, b, n, i)
{
# return a[1] = enum name, a[2] = key
n = split(s, b, /_/)
a[1] = ""
for (i = 1; i < n; i++) {
a[1] = a[1] cap(b[i]) # camel-case
}
a[2] = cap(b[n]) # key
}
function cc2(s, a, b, n, i)
{
# return a[1] = enum name, a[2] = key
n = split(s, b, /_/)
a[1] = ""
for (i = 1; i < n - 1; i++) {
a[1] = a[1] cap(b[i]) # camel-case
}
a[2] = b[n - 1] "_" cap(b[n]) # key
}
function cc3(s, a, b, n, i)
{
# return a[1] = enum name, a[2] = key
n = split(s, b, /_/)
enumkey = b[1]
a[1] = ""
for (i = 1; i < n - 1; i++) {
a[1] = a[1] cap(b[i]) # camel-case
}
a[2] = b[n - 1] "_" cap(b[n]) # key
a[1] = enumkey
}