#!/usr/bin/env bash
#
# Solution to Exercises 1,2,3,4 - sed
# Course "Sicurezza", undergraduate degree in Computer Science
# Ca' Foscari University, Venice
# https://secgroup.dais.unive.it/teaching/sicurezza/


###################################
# Exercise 1: Given a list of telephone numbers of the form 123456789 use sed to rewrite
# them as (123)456-789. Anything in the wrong format should be left unmodified.

echo '================= SOLUTION 1.1 =================='
# Solution with standard regexp. Three groups of three digits each. We also match begin and end of line
# so that only lines containing exactly numbers of 9 digits will match
sed 's/^\([[:digit:]][[:digit:]][[:digit:]]\)\([[:digit:]][[:digit:]][[:digit:]]\)\([[:digit:]][[:digit:]][[:digit:]]\)$/(\1)\2-\3/g' numeri.txt

echo '================= SOLUTION 1.2 =================='
# using GNU extensions we can use \{3\} to repeat each digit three times:
sed 's/^\([[:digit:]]\{3\}\)\([[:digit:]]\{3\}\)\([[:digit:]]\{3\}\)$/(\1)\2-\3/g' numeri.txt


###################################
# Exercise 2: break ROT13. The rot.txt text has been encrypted by replacing each letter with the one 13
# positions ahead in the alphabet (modulo 26) aka ROT13. Break it with sed!
# Hint: Check out command y.

echo '================= SOLUTION 2 ===================='
# It is enough to rotate the alphabet of 13 positions obtaining nopqrstuvwxyzabcdefghijklm and use
# to map one letter into the other. Notice that this is both encryption and decryption.
sed 'y/nopqrstuvwxyzabcdefghijklm/abcdefghijklmnopqrstuvwxyz/' rot.txt

###################################
# Exercise 3: filename conversion. Use sed to select and convert all file names with suffix .html given as output
# by ls into capital letters with suffix .HTM. Non-matching files should be omitted
# Hint 1: Check out command y
# Hint 2: You can concatenate commands as: sed ‘cmd1;cmd2’

echo '================= SOLUTION 3.1 =================='
# We use two sed commands separated by ;
# The first one uses y to capitalize the file name
# The second one uses a back reference \1 to replace the suffix HTML with HTM, notice the option p at the end
# p is necessary as we use -n to suppress anything that does not match .HTML, as required by the exercise
ls | sed -n 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/;s/^\(.*\)\.HTML$/\1.HTM/p' 
echo '================= SOLUTION 3.2 =================='
# using GNU extensions we can use \U to capitalize directly inside the s command:
ls | sed -n 's/^\(.*\)\.html$/\U\1.HTM/p' 
echo '================= SOLUTION 3.3 =================='
# Solution 3.1 is not completely accurate since it also prints files
# that already capitalized. The problem is that p needs to be the last
# command. A possible solution uses blocks to run script only when 
# a match happens:
ls | sed -n '/\.html/{y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/;s/^\(.*\)\.HTML$/\1.HTM/p}' 
echo '================= SOLUTION 3.4 =================='
# or alternatively
ls | sed -n '/\.html/{s/^\(.*\)\.HTML$/\1.HTM/; y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/; p}'
###################################
# Exercise 4: data extraction. Use sed to extract full user names (5th field) from /etc/passwd/

echo '================= SOLUTION 4.1 =================='
# Fields are separated by : so we need to use [^:]* to match field values, i.e., anything that is NOT :
# Then it is enough to put a group on the fifth field and "extract" it with \1
# Notice that full user names sometimes have commas, we take the part before the first comma. To this
# purpose we use [^:,]\+ (the \+ does not match empty names)
sed -n 's/^[^:]*:[^:]*:[^:]*:[^:]*:\([^:,]\+\)[^:]*:.*$/\1/p' /etc/passwd

echo '================= SOLUTION 4.2 =================='
# using GNU extensions we can repeat the first field four times with \{4\}. Back reference becomes \2 as
# the repeatition requires to use a group.
sed -n 's/^\([^:]*:\)\{4\}\([^:,]\+\)[^:]*:.*$/\2/p' /etc/passwd
