#!/bin/sh # ~jhs/bin/.sh/web_cp_remote # See also wed_cp_local # Link setup: /site/domain/berklix/usr/local/www/backup/Makefile # /site/usr/local/www/Data/virtual/berklix.net/backup -> # /usr/local/www/backup # Called from /var/jhs/crontab on both # www.berklix.org & user.js.berklix.net # See also: # http://www.freebsd.org/cgi/cvsweb.cgi/ports/www/httrack/ # http://www.httrack.com/ # Warning: This used 120M of swap, & gate=park ran out, it also makes gate # slow. So as it also runs on internal host, using gate as a proxy, # just run it on internal host=user, # which has the other advantage: do not need to keep both alternate # gates up to date. # 45 mins on a repeat run when already up to date. cd /usr/backup/www # --> /usr/local/www/backup # remote: ln -s ../local/www/backup /usr/backup/www # Only on local: ln -s /usr/backup/www /usr/local/www/backup if test $? -eq 0 ; then true # echo "wed_cp_remote cd succeeded on `hostname -s` `date`" | \ # mail -s "Cron: `hostname -s`" jhs else echo "wed_cp_remote cd failed on `hostname -s` `date`" | \ mail -s "Cron: `hostname -s`" jhs exit 1 fi domain1=`hostname -s` domain2=`hostname` domain=`hostname | sed -e s/${domain1}.//` ht="nice /usr/local/bin/httrack" # ht="nice /usr/local/bin/httrack --verbose --debug-log" sl="sleep 6" # sleep is so if I hit with ^C I dont have to manually do a load # of key strokes about 6 times to finally escape the shell. params="" # httrack --help # params="$params -A10000" # max 10 Kbyte/sec # man hhtrack: # -AN maximum transfer rate in bytes/seconds # (1000=1KB/s max) (--max-rate[=N]) # That manual perhaps should be "rate in K. bytes" ? # above (in web_cp_local) was too heavy prior to 2010.01, # so reduced to params="$params -A10" # max 10 Kbyte/sec params="$params -a" # = --stay-on-same-address params="$params -d" # = --stay-on-same-domain # params="$params --quiet" # params="$params -v" # for debug params="$params -w" # ? Insert "Mirrored from..." params="$params --update" # To reduce traffic params="$params -X" # purge old files # params="$params -s0" # ignore robots.txt if [ "$domain" = berklix.org ]; then # Max bytes per job params="$params -M10000000" # Less, to be gentle on server sites. fi if [ "$domain" = js.berklix.net ]; then # Allow more data as I can load my flat rate DSL. # www.uk.freebsd.org # More than 100000000 bytes have been transfered.. giving up) - OK # 100,000,000 params="$params -M1000000000" fi if [ "$domain" = js.berklix.net ]; then # Allow more data as I can load my flat rate DSL. # www.uk.freebsd.org # More than 100000000 bytes have been transfered.. giving up) - OK # 100,000,000 params="$params -M1000000000" if [ "${domain1}" = "mart" ]; then # Max bytes per job echo "Proxy not needed on gateway, but you may need lots of swap" elif [ "${domain1}" = "park" ]; then # Max bytes per job echo "Proxy not needed on gateway, but you may need lots of swap" else params="$params -P gate:80" echo "Proxy is set to gate:80" fi fi if [ "$domain" = berklix.org ]; then # { Remote servers. # du -s -k @ 2009.05: 794602 BAFUG # ---------- # For Phillips domain lists see also: # /site//site/usr/local/www/Data/virtual/berklix.net/index.lmth # /site/domain/berklix/usr/local/www/backup/index.lmth # ~jhs/bin/.sh/web_cp_remote $sl; $ht $params -M2000000 -O a1med.co.uk http://www.a1med.co.uk # 1686 $sl; $ht $params -M2000000 -O a1med.net http://www.a1med.net # 1666 $sl; $ht $params -M120000000 -O cyberknifeservice.com http://www.cyberknifeservice.com # 6742 $sl; $ht $params -M70000000 -O mediluxhealth.net http://www.mediluxhealth.net # 46156 $sl; $ht $params -O mediluxhealthcare.in http://mediluxhealthcare.in # ? # 2010.01 http://mediluxhealth.net/mhl_web_sites.html lists sites that could be backed up/mirrored. $sl; $ht $params -M70000000 -O mediluxprofessional.net http://mediluxprofessional.net # 59264 $sl; $ht $params -M120000000 -O mhlclinics.com http://www.mhlclinics.com # 11522 $sl; $ht $params -M120000000 -O mhldialysis.com http://www.mhldialysis.com # 1946 $sl; $ht $params -M90000000 -O ppmconsult.co.uk http://www.ppmconsult.co.uk # 52452 $sl; $ht $params -M30000000 -O surfacevision.com http://user.surfacevision.com # 14014 $sl; $ht $params -M9000000 -O nostradamus-dimde.de http://www.nostradamus-dimde.de # 5360 # ----------- fi # } date > Date_Of_Backup echo "Built by: http://www.berklix.com/~jhs/bin/.sh/`basename $0`" \ >> Date_Of_Backup # echo "wed_cp_remote finished on `hostname -s` `date`" | \ # mail -s "Cron: `hostname -s`" jhs exit 0 # Other HTML site copying tools apart from htttrack: # pavuk < ernst # spider < ernst # webcopy # wget < gary