<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>DZone Snippets: bash code</title>
    <link>http://snippets.dzone.com/posts</link>
    <pubDate>Fri, 08 Aug 2008 17:13:37 GMT</pubDate>
    <description>DZone Snippets: bash code</description>
    <item>
      <title>Real-time website referrers tracer</title>
      <link>http://snippets.dzone.com/posts/show/399</link>
      <description>Trace your website visitors (and referrers) as they come using this bash script.&lt;br /&gt;&lt;br /&gt;&lt;code&gt;&lt;br /&gt;# CONFIGURATION&lt;br /&gt;# =============&lt;br /&gt;&lt;br /&gt;# Where your httpd log file is&lt;br /&gt;log="current-http-accesslog"&lt;br /&gt;&lt;br /&gt;# What files to exclude (request for those files won't be shown)&lt;br /&gt;exclude="\.gif|\.jpg|\.png|\.ico|\.css|\.js"&lt;br /&gt;&lt;br /&gt;# Width of request and referer columns (set it to match your terminal)&lt;br /&gt;col_width=35&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;# MAIN SCRIPT&lt;br /&gt;# ===========&lt;br /&gt;&lt;br /&gt;# Check if log file actually exists (and is readable)&lt;br /&gt;if [ ! -r "${log}" ]; then&lt;br /&gt;echo "Cannot access log file: $log"&lt;br /&gt;exit 0&lt;br /&gt;fi&lt;br /&gt;&lt;br /&gt;# After startup we will output few lines&lt;br /&gt;start=`wc -l &lt; "${log}"`&lt;br /&gt;start=$(( $start - 30 ))&lt;br /&gt;if (( ${start} &lt; 0 ))&lt;br /&gt;then start=$((0))&lt;br /&gt;fi&lt;br /&gt;&lt;br /&gt;# Main loop&lt;br /&gt;while :&lt;br /&gt;do&lt;br /&gt;  end=`wc -l &lt; "${log}"`&lt;br /&gt;  end="${end##* }"&lt;br /&gt;  if (( ${end} &gt; ${start} ))&lt;br /&gt;  then&lt;br /&gt;    start=$(( $start + 1 ))&lt;br /&gt;    sed -n "${start},${end}p" "${log}" | egrep -v "${exclude}" | awk -v col_width=$col_width '{&lt;br /&gt;&lt;br /&gt;      # we are only interested in GET/POST requests&lt;br /&gt;      if ( match($0, /"(GET|POST).*?"/) &gt; 0 )&lt;br /&gt;      {&lt;br /&gt;        split($0, fields, "\"")&lt;br /&gt;&lt;br /&gt;        # IP_ADDRESS&lt;br /&gt;        tmp = $1&lt;br /&gt;        while ( length(tmp) &lt; 15 ) tmp = tmp " "&lt;br /&gt;        printf "%s", tmp " "&lt;br /&gt;    &lt;br /&gt;        # HTTP_REQUEST (GET/POST)&lt;br /&gt;        tmp = substr(fields[2], 0, index(fields[2], "HTTP/") - 1 )&lt;br /&gt;        tmp = substr(tmp, index(tmp, " ") + 1, col_width)&lt;br /&gt;        while ( length(tmp) &lt; col_width ) tmp = tmp " "&lt;br /&gt;        printf "%s", tmp " "&lt;br /&gt;    &lt;br /&gt;        # REFERER (the juice)&lt;br /&gt;        tmp = fields[4]&lt;br /&gt;        while ( length(tmp) &lt; col_width ) tmp = tmp " "&lt;br /&gt;        printf "%s", tmp " "&lt;br /&gt;    &lt;br /&gt;        # USER_AGENT&lt;br /&gt;        printf "%s", fields[6]&lt;br /&gt;    &lt;br /&gt;        # new line at the end&lt;br /&gt;        printf "\n"&lt;br /&gt;      }&lt;br /&gt;    }'&lt;br /&gt;&lt;br /&gt;    start=${end}&lt;br /&gt;  fi&lt;br /&gt;&lt;br /&gt;  # this is an endless loop that sleeps every second&lt;br /&gt;  sleep 1&lt;br /&gt;done&lt;br /&gt;&lt;/code&gt;&lt;br /&gt;</description>
      <pubDate>Sun, 19 Jun 2005 22:32:03 GMT</pubDate>
      <guid>http://snippets.dzone.com/posts/show/399</guid>
      <author>paulgoscicki (Paul Goscicki)</author>
    </item>
  </channel>
</rss>
