Auto download pdf from www.estado.com.br
// Note, you must subscribe this newspaper in order to download the pdfs
// You must have wget installed
1 2 export http_proxy="http://10.1.1.1:8000" 3 4 cookie='sMkjwKA67H8FDcsZX5' 5 dd=`date +%d` 6 mm=`date +%m` 7 yyyy=`date +%Y` 8 9 index="http://jpdf.estado.com.br/menupdfi.php?E=SP&D=$dd/$mm/$yyyy&A=/estadopdf/sp/paginas/$yyyy/$mm/$dd/A01.pdf" 10 11 rm index.txt 12 ./wget/wget -nc -k -S -U Mozilla --proxy --header "Cookie: User=$cookie " -O index.txt $index 13 if [ ! -f index.txt ]; then exit 1; fi 14 15 l=`gawk 'BEGIN {FS="\""} /option VALUE="\/estadopdf/ { print $2 }' index.txt` 16 17 for x in $l; do 18 19 # Ignora os classificados 20 if [ ${x%01.pdf} -eq "Cl" ]; then continue; fi 21 # Ignora o Guia 22 if [ ${x%01.pdf} -eq "Q" ]; then continue; fi 23 24 y=http://jpdf.estado.com.br${x%01.pdf} 25 i=1 26 flag=0 27 28 while [ $i -lt 40 ]; do 29 filename=`printf "%s%02d.pdf\n" $y $i` 30 31 echo "==================================================================" 32 echo $filename 33 echo "==================================================================" 34 ./wget/wget -P estado -nc -k -S -U Mozilla --proxy --header "Cookie: User=$cookie " $filename 35 if [ $? -eq 1 ]; then 36 let flag=flag+1 37 if [ $flag -gt 1 ]; then 38 flag=0 39 echo "Proximo caderno..."; break; 40 fi 41 fi 42 43 sleep 1 44 let i=i+1 45 done 46 done 47