#! /bin/sh # ############################################################################# NAME_="urlext" HTML_="extract url link" PURPOSE_="extract url addresses from file or stdin" SYNOPSIS_="$NAME_ [-hl] [-t file...] [file...]" REQUIRES_="standard GNU commands" VERSION_="1.2" DATE_="2002-07-16; last update: 2004-05-21" AUTHOR_="Dawid Michalczyk " URL_="www.comp.eonworks.com" CATEGORY_="www" PLATFORM_="Linux" SHELL_="bash" DISTRIBUTE_="yes" # ############################################################################# # This program is distributed under the terms of the GNU General Public License usage () { echo >&2 "$NAME_ $VERSION_ - $PURPOSE_ Usage: $SYNOPSIS_ Requires: $REQUIRES_ Options: -t file, extract urls from file and convert to html links url -h, usage and options (this help) -l, see this script" exit 1 } # tmp file set up tmp_1=/tmp/tmp.${RANDOM}$$ # signal trapping and tmp file removal trap 'rm -f $tmp_1 >/dev/null 2>&1' 0 trap "exit 1" 1 2 3 15 # init vars tags= # option and argument handling case $1 in -t) tags=on; shift ;; -h) usage ;; -l) more $0 ; exit 1 ;; *) tags=off ;; esac # main args=$@ cat "$@" | { # so we can act as a filter tr '<>"\47 ' '[\n*]' | sed -n -e 's/href=//gI' -e 's/src=//gI' -e '/http:/Ip' > $tmp_1 if [[ $tags == on ]]; then echo "URLs extracted from: "${args}"" while read line; do echo ""${line}"
" done < $tmp_1 echo "" else cat $tmp_1 fi }