// Note, you must subscribe this newspaper in order to download the pdfs
// You must have wget installed
export http_proxy="http://10.1.1.1:8000" cookie='sMkjwKA67H8FDcsZX5' dd=`date +%d` mm=`date +%m` yyyy=`date +%Y` index="http://jpdf.estado.com.br/menupdfi.php?E=SP&D=$dd/$mm/$yyyy&A=/estadopdf/sp/paginas/$yyyy/$mm/$dd/A01.pdf" rm index.txt ./wget/wget -nc -k -S -U Mozilla --proxy --header "Cookie: User=$cookie " -O index.txt $index if [ ! -f index.txt ]; then exit 1; fi l=`gawk 'BEGIN {FS="\""} /option VALUE="\/estadopdf/ { print $2 }' index.txt` for x in $l; do # Ignora os classificados if [ ${x%01.pdf} -eq "Cl" ]; then continue; fi # Ignora o Guia if [ ${x%01.pdf} -eq "Q" ]; then continue; fi y=http://jpdf.estado.com.br${x%01.pdf} i=1 flag=0 while [ $i -lt 40 ]; do filename=`printf "%s%02d.pdf\n" $y $i` echo "==================================================================" echo $filename echo "==================================================================" ./wget/wget -P estado -nc -k -S -U Mozilla --proxy --header "Cookie: User=$cookie " $filename if [ $? -eq 1 ]; then let flag=flag+1 if [ $flag -gt 1 ]; then flag=0 echo "Proximo caderno..."; break; fi fi sleep 1 let i=i+1 done done