# WEBSITE PARSING PROGRAM
#
# NOTE: ALL CONTENT ON CREATED WEBPAGE IS OWNED BY THE CORRESPONDING
# WEBSITES. CLICK ON LINK TO SEE ORIGINAL CONTENT
#
#Created by : Justin Bischoff
#
#Latest Revision : 6/24/03
# -Added dynamic QOTD
# -improved html layout
#
#DO NOT USE OR DISTRIBUTE THIS CODE
#
#There is no warranty, explicit or implied, on this code.
#
#Copyright 2003 Justin Bischoff
#
#Send me ideas for changes or improvements
#
#webpage content is owned by corresponding webpage owner,
# see indiviual pages for details.
#
#publicly providing the content of the generated web page
# without permission of the content owners could be
# breaking the law
#############################################
# Description
#
# This is a perl script that uses the LWP module to
# grab specific webpages off of the internet.
#
# The second half of the script goes through the grabbed
# webpages and turnes them into a summary page
#############################################
# Usage
# - use perl to run. C:\perl l33t_grabber.pl
#
# Command Line Parameters - examples (in any order)
# "force 3" - forces up to 3 attempts at a page
# "proxy my.proxy:69" - uses my.proxy at port 69
# "timeout 12" - sets the timeout to 12 seconds
# "comicsoff" - only news sites grabbed - much faster
#
# ie. "C:\perl l33t_grabber.pl proxy proxy.fm.intel.com:911 force 2 comicsoff"
# this line will use folsom's proxy, make two attempts at a webpage and it
# has the comics turned off.
#I hate use strict
use LWP::UserAgent;
use POSIX qw/strftime/;
###############################
#things to add:
# - Save a text file w/date of source html, only look through web if file dne
# - Add automatic image grabber - yahoo most popular? - image of the day
# - add current events page
# - add more comics
# - handle more failure cases, or handle them better.
#
# - automatically spawn the webpage after the script is run. (Command line parameter?)
# / more command line parameters?
# - more stock?, weather, e2, circuit?
# - Current Movie Listings or Showtimes?
# - Current Events - concerts, shows, plays, lectures, etc.
#
# - http://www.weather.com/weather/local/USCA0967?x=0&lswe=Sacramento&lswa=WeatherLocalUndeclared&whatprefs=&y=0
#
# - Horoscope? (theOnion?)
###########################################################
#USER VARIABLES - change these per personal preference #
#
#for news sites, this is the number of headlines per #
# category to display. this only kinda works, #
$Number_of_headlines = 3; #
#
#What is the html result title going to be? #
$Page_title = "Nerd's Web Summary"; #
#
#What is the filename going to be? #
$Output_file = "nerd_summary.html"; #
#
#width of news columns must be forced #
$column_width = 33; #
#
#name and port of your proxy (can use command line instead)
#my $my_proxy = 'http://myproxy.domain.com:911'; #
my $my_proxy = 'http://myproxy2.domain.com:911'; #
#
#
#timeout before we quit trying to load a page. #
$timeout = 13; #in LWP timeouts don't really work #
#
$logfiles = 0;
# #
###########################################################
#ONLY 1337 P3rl Ninjas venture below this line!
#get time in "Month-Day-Year"
$now = strftime( "%B-%d-%Y", localtime(time()) );
#streamline this code!
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
#print "$mon-$mday-$year : $isdst\n";
my $daysleft = 318 - $yday;
###########################################################
# process user arguments
#
#DO NOT change these default values.
$this_param = 0; $comics_on = 1; $force = 1;
$proxy = "none";
foreach $cl_param (@ARGV) {
#logfile - enables logfiling daily- creates lots of files.
#
if($cl_param=~/logfile/){
$logfiles = 1;
open(LOGFILE, ">logfile_$now.txt") || die "Couldn't open logfile_$now.txt\n" . $!;
&print_error( "HTML PARSER LOGFILE\n\nDate: $now\n\n");
}
#force - forces a read, if a number follows force "force 2" it will
# try 2 times on each website. Force w/o a number may cause
# an infinite runtime!
if($cl_param=~/force/){
if($ARGV[$this_param+1]=~/\d+/){
$force=$ARGV[$this_param+1];
if($force>5){
&print_error( "FOOL! Do you want the script to run forever?\n");
&print_error( "I AM COMPUTER, SMARTER THAN THOU, USE A LOWER FORCE VALUE!\n");
$force = 5;
}
elsif($force<=0){
&print_error( "WHAT ARE YOU TRYING TO DO?\nBAD USER, BAD!\n");
$force = 1;
}
&print_error( "Using a force of $force cycles\n");
}
else{
$force=1;
&print_error( "ERROR PARSING COMMAND LINE PARAMETERS!\n");
&print_error( "\"force\" must be followed by a number.\nForce not used\n");
}
}
#timeout - sets timeout in number of seconds. Include a number after the
# word timeout, ie "timeout 10"
if($cl_param=~/timeout/){
if($ARGV[$this_param+1]=~/\d+/){
$timeout=$ARGV[$this_param+1];
&print_error( "Using a timeout of $timeout\n");
}
else{
&print_error( "ERROR PARSING COMMAND LINE PARAMETERS!\n");
&print_error( "\"timeout\" must be followed by a number.\nTimeout of $timeout sec. used\n");
}
}
#proxy - defines your proxy. It must be followed by a name colon
# and port number "proxy.fm.intel.com:911
# current default is Intel's.
if($cl_param=~/^proxy/){
$proxy = $ARGV[$this_param+1];
if($proxy =~/\w.*?\:\d+/){
if($proxy =~ /http\:\/\// ){
$my_proxy = $proxy;
}
else{
$my_proxy = "http://".$proxy;
}
}else{
&print_error( "ERROR PARSING COMMAND LINE PARAMETERS!\n");
&print_error( "\"proxy\" must be followed by a valid proxy.\nProxy $my_proxy used\n");
}
}
#comicsoff - script will grab only news pages, saves you time for the script
# running, and time for the webpage to load up.
if($cl_param=~/^comicsoff/){
$comics_on=0;
&print_error( "COMICS WILL BE OFF!\n");
}
$this_param++;
}################ FINISHED PROCESSING COMMAND LINE PARAMETERS #####################
#can try Yahoo RSS http://rss.news.yahoo.com/rss/topstories
#Retuers Photos match "class="photoLink"" at http://www.reuters.com/newsPhotoGallery.jhtml?type=topNews
#URLs to get - dont change these unless target url changes
$url[0]="http://news.google.com/news/gnmainlite.html";
$url[1]="http://slashdot.org/slashdot.xml";
#http://www.theinquirer.net/inquirer.rss
$url[2]="http://www.theinquirer.net/index.html";
if($comics_on){
$url[3]="http://www.dilbert.com/comics/dilbert/index.html";
$url[4]="http://www.penny-arcade.com/view.php3";
$url[5]="http://pvponline.com/";
$url[6]="http://www.dieselsweeties.com/";
#$url[17]="http://www.megatokyo.com/";
#$url[]="http://www.littlegamers.com/";
}
$url[7]="http://finance.yahoo.com/q?s=intc&d=v1";
#$url[8]="http://my.yahoo.com/?myHome";
$url[8]="http://www.sacbee.com/content/news/";
$url[9]="http://www.kirkwood.com/conditions.asp";
$url[10]="http://www.aceshardware.com/";
$url[11]="http://www.cnn.com/";
$url[11]="http://edition.cnn.com/";
$url[12]="http://dictionary.reference.com/wordoftheday/";
$url[13]="http://www.qotd.org/";
#UPCOMING EVENTS
$url[14]="http://www.mondaviarts.org/index.lasso";
$url[15]="http://www.berkeley.edu/calendar/";
$url[16]="http://sfgate.com/eguide/epicks/";
#$url[17]="http://www.weather.com/weather/print/95816";
#sacramento
#$url[17]="http://wwwa.accuweather.com/adcbin/public/local_index.asp?zipcode=95816&partner=accuweather";
#kirkwood
$url[17]="http://wwwa.accuweather.com/adcbin/public/local_index.asp?zipcode=95646&partner=accuweather";
$url[18]="http://news.yahoo.com/news?tmpl=index2&cid=716";
$url[19]="http://www.reuters.com/newsPhotoGallery.jhtml?type=topNews";
#create a UserAgent
my $agent_name = "PPMOD-Agent/LWP";
my $ua = LWP::UserAgent->new($agent_name);
#set temporary timeout for test
$ua->timeout(3);
$ua->agent('MSIE/6.0');
#uncomment this line when line 142 is fixed :
#142: if(!($test_page[0] =~ /HTTP\/1\.1 200 OK/)){
#ie. don't test for proxy when it is specified in command line.
#if ($proxy == "none"){}
#test connection to see if proxy is in place.
&print_error( "Testing connection...\n");
my $test_request = HTTP::Request->new(GET => "http://perl.com");
@test_page = split(/\n/,$ua->request($test_request)->as_string);
#
#set real timeout value
$ua->timeout($timeout);
#do we need to use the proxy?
if(!($test_page[0] =~ /HTTP\/1\.1 200 OK/)){
$ua->proxy(['http'], $my_proxy);
&print_error( "\nUsing proxy $my_proxy\n");
}
else{
&print_error( "\nNot using a proxy\n");
}
#initialize value
$current_page=0;
#loop through each url in above list
foreach $webpage (@url) {
&print_error( "Requesting $webpage...\n");
#Continue to try if http request fails until we have tried $force number of times.
for ($f_count=0;$f_count < $force ;$f_count++) {
my $request = HTTP::Request->new(GET => $webpage);
#if we are supposed to read this page, issue HTTP request.
if( $comics_on || $current_page<3 || $current_page>6 ){
#get each page and split it into an array.
$request->header(Accept => 'text/html');
@this_page = split(/\n/,$ua->request($request)->as_string);
}
#page not found.
if( ($this_page[0] =~ /\(Internal Server Error\)/) || ($this_page[0] =~/404 Not Found/)){
&print_error( "ERROR! Could not connect to : $webpage\n");
$this_page[0] = "\<br\>\<small\>(Internal Server Error)\<br\>\n";
$this_page[1] = "Error connecting to webpage.\<br\>\n";
$this_page[2] = "\<a href\=\"$webpage\"> Click Here to visit your webpage.";
$this_page[2] = $this_page[2] . "\<\/a\>\<\/small\>\<br\>\n";
if($f_count<$force-1){
&print_error( "Trying again...\n");
}
}
else{
#exit for loop because we have the http data
$f_count=$force;
&print_error( "Data received successfully.\n");
}
}# for loop forcing re-reads
#create an array of these references for each html page.
$all_data[$current_page] = [ @this_page ];
$current_page++;
}
######## DONE GETTING HTML #################
# Create Webpage Summary Now, parse pages.
#
# If you don't know regexps, this code will be gibberish.
&print_error( "\nCreating output file...\n");
#create output file
open(NEW, ">$Output_file") || die $!;
#always start html file with this junk.
if ($hour>12) {
$american_time=$hour-12;
print NEW "<HTML>\n<HEAD>\n<TITLE>$Page_title : $now $american_time:$min"."pm</TITLE>\n";
}else{
print NEW "<HTML>\n<HEAD>\n<TITLE>$Page_title : $now $hour:$min"."am</TITLE>\n";
}
print NEW "<script type=\"text/javascript\">\n";
print NEW "<!--\n";
print NEW "function ToggleQOTD()\n";
print NEW "{\n";
print NEW " if(qotd.style.visibility == \"visible\"){\n";
print NEW " qotd.style.visibility = \"hidden\";\n";
print NEW " qotd.style.top -= 110;\n";
print NEW " }else{\n";
print NEW " qotd.style.visibility = \"visible\";\n";
print NEW " qotd.style.top = 110;\n";
print NEW " }\n";
print NEW "}\n\n";
print NEW "function ToggleEvents()\n";
print NEW "{\n";
print NEW " if(UpcomingEvents.style.visibility == \"visible\"){\n";
print NEW " UpcomingEvents.style.visibility = \"hidden\";\n";
print NEW " }else{\n";
print NEW " UpcomingEvents.style.visibility = \"visible\";\n";
print NEW " }\n";
print NEW "}\n";
print NEW "-->\n";
print NEW "</script>\n";
print NEW "\n</HEAD>\n<BODY>\n\n";
&print_error( "\nParsing results into new html...\n");
$headline_limit = 0;
$counter = 0;
#create outer table for rightbar
print NEW "<!--rightbar table, one column for news, one column for rightbar-->\n";
print NEW "<TABLE border=\"0\" bgcolor=\"#ffffff\"><TR><TD valign=\"top\">\n\n";
#next three lines create border around news section
print NEW " <!--Border-1 around all news and header-->\n";
print NEW " <TABLE border=\"0\" cellpadding=\"2\"><tr><td width=\"100%\" bgcolor=\"#999999\">\n";
print NEW " <TABLE border=\"0\" bgcolor=\"#ffffff\" cellpadding=\"2\" width=\"100%\">\n";
print NEW " <tr><td align=\"left\" bgcolor=\"#ffffff\">\n\n";
#The number here (colspan) corresponds to how many news columns the title bar spans.
print NEW " <TABLE border=\"0\"><TR><TD colspan=\"4\" valign=\"top\">\n";
#create border around header
&html_border();
print NEW " <TABLE border=\"0\" width=\"100%\">\n";
print NEW " <!--Main Top Header row-->\n";
#weather - 10 day header
print NEW "\n\n<!-------- Weather SECTION -------------->\n";
print NEW " <TR><TD align=\"center\" colspan=\"9\">\n";
$data_flag=0; $counter=0;
#table for forecast only
print NEW "\n <TABLE border=\"0\"> <!--forecast table-->\n";
print NEW " <TR>\n";
foreach $line (@{$all_data[17]}) {
#DEBUG PRINT ENTIRE FILE
#print NEW "<!--$line-->\n";
if($line =~ /\(Internal Server Error\)/){
print NEW $line;
}
if($line =~ /BEGIN FIRST 7/){
$data_flag=1;
}
if($counter>=7){
$data_flag=0;
}
#Look for images
if (($line =~ /common\/i/) && ($data_flag==1)){
print NEW " <TD align=\"right\">\n";
$line =~ s/\<\/?font.*?\>//g;
$line =~ s/\<\/?b\>//g;
$line =~ s/\<\/?a.*?\>//gi;
#make picture smaller
$line =~ s/\"31\"/\"24\"/g;
#remove leading whitespace
$line =~ s/^\s+//;
print NEW " <a href=\"$url[17]\" style=\"text-decoration:none\">" . $line;
print NEW "</a>\n </TD><TD align=\"left\" width=\"55\">\n";
}
if (($line =~ /sevendaynew/) && ($data_flag==1)){
#remove some unwanted text
$line =~ s/\s\d?\d\/\d\d?//g;
$line =~ s/\<font.*?\<br\>/\<br\>/;
$line =~ s/High//g;
$line =~ s/Low//g;
$line =~ s/ F//g;
$line =~ s/\;\s*?\//\//;
$line =~ s/\<\/?font.*?\>//g;
$line =~ s/\<\/?b\>//g;
$line =~ s/\<\/?a.*?\>//gi;
$line =~ s/^\s+//;
print NEW " <small><small><a href=\"$url[17]\" style=\"text-decoration:none\">";
print NEW "<font color=\"black\">".$line."</font></a></small></small>\n";
if($line =~ /deg/){
print NEW " </TD>\n";
$counter++;
}
}
}
$data_flag=0;
print NEW " </TD></TR></TABLE><!--end forecast table-->\n\n";
print NEW " </TD></TR>\n";
print NEW "\n\n<!-------- Kirkwood Snow SECTION -------------->\n";
print NEW " <TR><TD align=\"left\">\n";
#Daily Snowfall
$data_flag=0;
foreach $line (@{$all_data[9]}) {
if($line =~ /\(Internal Server Error\)/){
print NEW $line;
}
if(($data_flag) && ($line=~/font/)){
$line =~ s/.*?\<font.*?\>//;
$line =~ s/\<.*//;
print NEW " <small>\n <a href=\"http://www.kirkwood.com/conditions.asp\">";
print NEW "Kirkwood</a> <a href=\"http://www.weather.com";
print NEW "/outlook/recreation/ski/weather/tenday/209004\">";
print NEW "snowfall</a>: $line\n </small>\n";
$data_flag=0;
}
if($line=~ /New Snow in the/) {
$data_flag=1;
}
}
$data_flag=0;
#temporary summer kirkwood section
#print NEW " <small>\n <a href=\"http://www.kirkwood.com/conditions.asp\">";
#print NEW "Days until next season</a> : ";
#print NEW "$daysleft\n </small>\n";
#create column for middle of top header
print NEW " </TD>\n";
print NEW " <TD align=\"center\">\n";
print NEW " <center>\n";
print NEW " <font size=\"4\"><b>Justin's Website Parser</b></font><br>\n";
print NEW " </center>\n";
print NEW " </TD>\n";
#create column for rightside of top header
print NEW "\n\n<!-------- INTEL STOCK SECTION -------------->\n";
print NEW " <TD align=\"right\">\n";
#Stock Ticker
foreach $line (@{$all_data[7]}) {
if($line =~ /\(Internal Server Error\)/){
print NEW $line;
}
if($line =~ /l\>Reuters/){
#remove unwanted html tags
$line =~ s/\<\/?td.*?\>//g;
$line =~ s/\<\/?big.*?\>//g;
$line =~ s/\<\/?small.*?\>//g;
#remove leading whitespace
$line =~ s/^\s+//;
#remove everything after "Reuters".
$line =~ s/Reuters.*/Reuters/;
#make Reuters comment smaller
$line =~ s/Reuters/\<small\>Reuters\<\/small\>/;
#grab the change in stock price
#$line =~ /\>([+-][^<]*)/;
#$temp_1 = $1;
#if($temp_1=~/\+/){
# $temp_1=~s/(.*)/\<font color\=\"green\"\>$1\<\/font\>/;
#}
#else{
# $temp_1=~s/(.*)/\<font color\=\"red\"\>$1\<\/font\>/;
#}
#add finance.yahoo.com to url
#$line =~ s/href\=\"\//href\=\"http\:\/\/finance\.yahoo\.com\//;
#print NEW " <small>\n $line : $temp_1 \n </small>\n <br>\n";
print NEW " <small><a href=\"$url[7]\" style=\"text-decoration:none\">";
print NEW "<font color=\"black\"> $line </font></a></small><br>\n";
}
}
print NEW " </TD></TR>\n";
#row inside header to display WOTD
print NEW "\n\n<!-------- Word of the Day SECTION -------------->\n";
print NEW " <TR><TD colspan=\"3\">\n";
################## WORD OF THE DAY
$getNextLine=0;
foreach $line (@{$all_data[12]}) {
#handle html failures. - don't want this for WOTD!
#if($line =~ /\(Internal Server Error\)/){
# print NEW $line;
#}
if($line=~/span style/){
#add <bold> tags
$line =~ s/\<span.*?\>/\<b\>/;
$line =~ s/\<\/span.*?\>/\<\/b\>/;
#remove all span tags
$line =~ s/\<\/?span.*?\>//g;
#remove all br tags
$line =~ s/\<\/?br.*?\>//g;
#remove excessive whitespace
$line =~ s/\s+/ /g;
print NEW " <center>\n <small>\n <a href=\"$url[12]\" style=\"text-decoration:none\">";
print NEW "<font color=\"black\">\n $line";
}
if ($getNextLine==1) {
print NEW " $line\n </font></a>\n <\small>\n </center>\n";
$getNextLine=0;
}
if ($line =~ /\<\!\-\- WOTD/) {
$getNextLine=1;
}
}
################## end WORD OF THE DAY
print NEW " </TD>\n </TR>\n</TABLE>\n";
#end WOTD Row
#right column might go here
print NEW " </TD>\n </TR>\n ";
&end_border();
print NEW "\n\n<!-------- GOOGLE NEWS SECTION -------------->\n";
print NEW " <TR>\n <TD valign=\"top\">\n";
#GOOGLE NEWS PARSER:
&html_border();
print NEW "<A STYLE=\"text-decoration:none\" href=\"http://news.google.com/\">
<FONT size=\"5\" color=\"red\"><b>Google News</b></FONT></A><BR>\n";
&end_border();
$errorOccured=0;
foreach $line (@{$all_data[0]}) {
@temp = split(/\<br\>/,$line);
foreach $line2 (@temp) {
&html_error($line2);
#print NEW "<!--$line2-->\n";
#get categories
if( ($line2 =~ /\<b\>Top/) || ($line2 =~ /\<b\>World/) ||
($line2 =~ /\<b\>U\.S/) || ($line2 =~ /\<b\>Business/) ||
($line2 =~ /\<b\>Sci/) || ($line2 =~ /\<b\>Sports/) ||
($line2 =~ /\<b\>Enter/) || ($line2 =~ /\<b\>Health/)){
#$line2=~ /\<b\>([a-zA-Z ]+)\<\/b\>/;
#$result = $1;
#print $1."\n";
#remove HTML tags.
$line2 =~ s/\<\/?IMG.*?\>//g;
$line2 =~ s/\<\/?p.*?\>//g;
#remove table tags
$line2 =~ s/\<\/?tr.*?\>//g;
$line2 =~ s/\<\/?td.*?\>//g;
$line2 =~ s/\<\/?table.*?\>//g;
#remove links
$line2 =~ s/\<a.*?\/a\>//g;
#remove extra characters
$line2 =~ s/\|//g;
#remove whitespace
$line2 =~ s/\s+//g;
#Remove JUMP TO feature
$line2 =~ s/\<fontsize\=\-2.*//;
#dont print </small> first time
if($headline_limit >= 1){
print NEW "</small>\n";
}
print NEW $line2, "<BR>\n";
print NEW "<small>\n";
$headline_limit = $Number_of_headlines + 1;
}
#get headlines and put into file
if(($line2 =~ /.*class\=y/) && ($headline_limit > 1)){
#remove table tags
$line2 =~ s/\<\/?tr.*?\>//g;
$line2 =~ s/\<\/?td.*?\>//g;
$line2 =~ s/\<\/?table.*?\>//g;
#/url?ntc=0L0A0&q=
$line2 =~ s/\/url\?.*?q\=//g;
#$line2 =~ s/\<.*?\>//; #removes html
$line2 =~ s/class\=.//;
#add a linebreak after a <br> tag
$line2 =~ s/\<BR\>/\<BR\>\n/g;
#shorten links that are too long
$line2 = &shorten_line($line2);
print NEW "$line2<br>\n";
&print_error( $line2);
$headline_limit--;
}
}
}
print NEW " </TD>\n <TD valign=\"top\">\n";
#END GOOGLE NEWS PARSER:
$headline_limit = 0;
$counter = 0;
$title = "";
print NEW "\n\n<!-------- SLASHDOT NEWS SECTION -------------->\n";
#SLASHDOT NEWS PARSER:
&html_border();
print NEW "<A STYLE=\"text-decoration:none\" href=\"http://slashdot.org/\"><FONT size=\"5\" color=\"green\">
<b>Slashdot</b></FONT></A>\n";
&end_border();
print NEW "<P>\n<small>\n";
$errorOccured=0;
foreach $line (@{$all_data[1]}) {
&html_error($line);
#print NEW "<small><small> $line </small></small><br>\n";
if($line =~ /Your Headline Reader Has Been Banned/){
print NEW "<a href=\"http://www.slashdot.org\">Slashdot</a> is being a little b*!@#.<br>\n";
print NEW "Whoever wrote their crummy xml/rss<br> server ";
print NEW "needs a 133tness class<br> from yours truly.";
}
#bug fix to remove extra "amp;" due to xml weirdness
$line =~ s/amp\;amp\;/amp\;/g;
#parse XML for headlines
if(($line =~ /\<title/) &&($counter<=7)){
$line =~ s/\<.*?title\>//g;
$line =~ s/^\s+//g;
$title = $line;
$original_title = $title;
if(length($title)>$column_width+3){
$title=substr($title,0,$column_width);
$title=$title."...";
}
}
if(($line =~ /\<url/) && ($counter<=7)){
$line =~ s/\<.*?url>//g;
$line =~ s/\s+//g;
print NEW "<a title=\"$original_title\" href=\"$line\">$title</a><BR>\n";
&print_error( "<a title=\"$original_title\" href=\"$line\">$title</a><BR>\n");
$counter++;
}
}
print NEW "</small>\n</P>\n";
#END SLASHDOT NEWS PARSER:
print NEW "\n\n<!-------- INQUIRER NEWS SECTION -------------->\n";
$valid_data = 0;
#INQUIRER NEWS PARSER:
&html_border();
print NEW "<A STYLE=\"text-decoration:none\" href=\"http://www.theinquirer.net/\"><FONT size=\"5\" color=\"purple\">
<b>The Inquirer</b></FONT></A>\n";
&end_border();
$errorOccured=0;
foreach $line (@{$all_data[2]}) {
&html_error($line);
#parse HTML for headlines
if($line =~ /td_mainbody/){
$valid_data=1;
}
if(($valid_data) && ($line =~ /div class\=\"ht\"/)){
#remove all the following HTML tags.
$line =~ s/\<\/?td.*?\>//g;
$line =~ s/\<\/?tr.*?\>//g;
$line =~ s/\<\/?img.*?\>//g;
$line =~ s/\<\/?b.*?\>//g;
$line =~ s/\<\/?embed.*?\>//g;
$line =~ s/\<\/?hr.*?\>//g;
$line =~ s/\<\/?s.*?\>//g;
#replace everything after a link with a <br>tag.
$line =~ s/\/a\>/\/a\>\<br\>/g;
#insert theinquirer.net into a link url
$line =~ s/\"\?/\"http\:\/\/www\.theinquirer\.net\/\?/g;
print NEW "</small>\n<P>\n<small>\n";
@temp = split(/\<\/div\>\<\/div\>\<\/div\>/,$line);
$count = 0;
foreach $newline (@temp) {
if(($count<8) && !($newline=~/2002 Breakthrough Publishing Ltd/) && !($newline=~/Advert/)) {
$newline =~ s/\<\/?div.*?\>//g;
#remove font tags and everything inside them
$newline =~ s/\<font.*?\/font\>//g;
$newline =~ s/\s+$//;
$newline =~ s/\<br\>.*/\<br\>\n/;
#remove everything between <a></a> links
$newline =~ s/\<\/a.*?\>.*?\<a/\<\/a\>\<br\>\n\<a/g;
#shorten links that are too long
$newline = &shorten_line($newline);
#remove leading whitespace
$newline =~ s/^\s+//;
print NEW $newline;
&print_error( $newline);
}
$count++;
}
if($newline=~/\w+/){
print NEW "</small>\n</P>\n";
print NEW "<BR>\n";
}
}
}
print NEW " </TD>\n <TD valign=\"top\">\n";
#END INQUIRER NEWS PARSER:
#!@#$%^&*()
print NEW "\n\n<!-------- SACBEE NEWS SECTION -------------->\n";
#SacBee NEWS & INFO PARSER
&html_border();
print NEW "<A STYLE=\"text-decoration:none\" href=\"http://sacbee.com/\">
<FONT size=\"5\" color=\"orange\"><b>SacBee</b></FONT></A>";
&end_border();
print NEW "<BR>\n<small>\n";
$data_flag=$Number_of_headlines * 5;
$errorOccured=0;
foreach $line (@{$all_data[8]}) {
&html_error($line);
#grab and print actual links to articles
if( (($line =~ /nheadLineSS/) || ($line =~ /ntopStory/)) && ($data_flag>=0)){
$line =~ s/\<br \/\>/\<br\>/g;
#add html to link
$line =~ s/href\=\"/href\=\"http\:\/\/www\.sacbee\.com/;
$line = &shorten_line($line);
print NEW "$line\n";
&print_error( "$line\n");
$data_flag--;
}
}
print NEW"</small>\n</P>\n";
#END SACBEE NEWS & INFO PARSER
#print NEW "\n\n<!-------- YAHOO/REUTERS NEWS SECTION -------------->\n";
##YAHOO NEWS & INFO PARSER
#&html_border();
#print NEW "<A STYLE=\"text-decoration:none\" href=\"http://my.yahoo.com/\">
#<FONT size=\"5\" color=\"orange\"><b>My Yahoo!</b></FONT></A><BR>\n<small>\n";
#&end_border();
#
#$data_flag=0; $errorOccured=0;
#foreach $line (@{$all_data[8]}) {
# &html_error($line);
#
#
# #grab and print actual links to articles
# if(($data_flag) && ($line =~ /story\.news\.yahoo\.com/)){
# $line =~ s/\<\/?li.*?\>/\n/g;
# $line =~ s/\<\/?td.*?\>//g;
# $line =~ s/\<\/?tr.*?\>//g;
# $line =~ s/\<\/?font.*?\>//g;
# $line =~ s/\<\/?ul.*?\>//g;
# $line =~ s/\/a\>/\/a\>\<br\>/g;
# $line =~ s/\<i.*?\/i\>//g;
# if(!($line =~ /\<b\>/)){
#
# #do a split here.
# @three_headlines = split(/\n/,$line);
# foreach $line (@three_headlines) {
#
# #shorten links that are too long
# $line = &shorten_line($line);
#
# print NEW "$line\n";
# &print_error( "$line\n");
#
# $data_flag--;
# }
# }
# }
# #grab and print headlines
# if (($line =~ /Top Stories from Reuters/) || ($line =~ /World News from Reuters/) ||
# ($line =~ /Business News from Reuters/) ||
# ($line=~/Politics News from Reuters/)){
# #$line =~ s/\<[^a].*?\<\/.*?\>//g;
# $line =~ s/.*\<b\>/\<b\>/g;
# $line =~ s/\<\/?td.*?\>//g;
# $line =~ s/\<\/?tr.*?\>//g;
# $line =~ s/\<\/?br.*?\>//g;
# $line =~ s/\<\/?font.*?\>//g;
# $line =~ s/\<\/?table.*?\>//g;
# $line =~ s/\<i.*?\/i\>//g;
# $line =~ s/\<\!.*?\/\-\-\>//g;
#
# #may want to remove these
# $line =~ s/from\s+Reuters//g;
# $line =~ s/\<\/?a.*?\>//g;
# print NEW "</small>\n$line\n<br>\n<small>";
# $data_flag=2;
# }
#
#}
#print NEW"</small>\n</P>\n";
##END YAHOO NEWS & INFO PARSER
print NEW "\n\n<!-------- ACESHARDWARE NEWS SECTION -------------->\n";
$valid_data = 0;
$second_line=0;
############ ACESHARDWARE NEWS PARSER: ################
print NEW "<br>\n";
&html_border();
print NEW "<A STYLE=\"text-decoration:none\" href=\"$url[10]\"><FONT size=\"5\" color=\"blue\">
<b>Ace's Hardware</b></FONT></A>\n";
&end_border();
print NEW "<P>\n<small>\n";
$errorOccured=0; $link_var = "";
foreach $line (@{$all_data[10]}) {
&html_error($line);
#PRINTS ARTICLE TITLE
if ($second_line) {
$line =~ s/\<\/?font.*?\>//g;
$line =~ s/\<\/?b\>//g;
#remove extra whitespace
$line =~ s/\s*$//;
$line =~ s/^\s*//;
$line_new = $link_var . $line;
$line_new = shorten_line($line_new);
print NEW $line_new."\n";
&print_error( "$line_new\n");
}
$second_line=0;
#Finds LINK
if(($valid_data) && ($line=~/\<a/)){
#print NEW "\n<!--$line-->\n";
$line =~ s/href\=\"\#/href=\"http\:\/\/www\.aceshardware\.com\/\#/g;
$line =~ s/\<\/?font.*?\>//g;
$line =~ s/\<\/?b\>//g;
#insert breaks after <br> tags
$line =~ s/\<br\>/\<br\>\n/g;
#remove extra whitespace
$line =~ s/\s*$//;
$line =~ s/^\s*//;
#insert title
#$line =~s /\<a\s+h/\<a title\=\"$temp_line\" h/;
$second_line=1;
#print NEW $line;
#&print_error( "$line");
$link_var = $line;
}
if ($line =~ /Latest News\<\/b\>/){
$valid_data = 1;
}
if ($line =~ /\<\/td/) {
$valid_data = 0;
}
}
print NEW "</small>\n</P>\n </TD>\n <TD valign=\"top\" width=\"180\">";
########################################
#Yahoo Top Images
# print NEW " <Table>\n <TR><TD valign=\"top\" width=\"180\">\n";
print NEW "\n <!-------- News Images from Yahoo-------------->\n";
&html_border();
print NEW "<A STYLE=\"text-decoration:none\"";
print NEW "href=\"http://news.yahoo.com/news?tmpl=index2&cid=716/\">";
print NEW "<FONT size=\"5\" color=\"red\">
<b>Yahoo News</b></FONT></A><BR>\n";
&end_border();
print NEW " <P>\n";
$valid_data=0; $alternate=0;
$errorOccured=0; $picture_size = 35;
foreach $line (@{$all_data[18]}) {
&html_error($line);
if($line =~ /class\=topstory/){
$line =~ s/.*?\>//;
print NEW " <small>$line</small></a><br clear=all>\n";
}
if(($line =~ /alt\=Photo/) or ($line =~ /alt\=Slideshow/)){
#only one leading space
$line =~ s/\s+/ /g;
#trim size
$line =~ s/width\=(\d{2,3})/width\=50/;
$width = $1;
$line =~ s/height\=(\d{2,3})/height\=50/;
$height = $1;
#keep original aspect ratio
$ratio = $width / $height;
if($width > $height){
$height = $picture_size;
$width = $picture_size * $ratio;
}else{
$width = $picture_size;
$height = $picture_size / $ratio;
}
$line =~ s/height\=(\d{2,3})/height\=$height/;
$line =~ s/width\=(\d{2,3})/width\=$width/;
#remove </a> tags
$line =~ s/\<\/a\>//;
#edit <a> tags for no underline, <img> tags no border
# $line =~ s/\<a/\<a STYLE\=\"text\-decoration\:none\"/;
$line =~ s/\<img/\<img border\=\"0\"/;
#remove comments
$line =~ s/\<\!\-\-.*?\-\-\>//g;
#align alternating
$line =~ s/align\=left//;
$line =~ s/align\=right//;
if($alternate==0){
$line =~ s/\<img/\<img align\=\"left\"/;
$alternate = 1;
}else{
$line =~ s/\<img/\<img align\=\"right\"/;
$alternate = 0;
}
print NEW " $line\n";
$valid_data = 1;
}
}
#border for news section
&end_border();
#end news section
print NEW " </TD>\n </TR>\n</TABLE>\n<BR>\n";
#start 4th Column
print NEW " </TD>\n <TD rowspan=\"9\" valign=\"top\">\n<!--START Righthand COLUMN 4-->\n";
####### SPECIAL RIGHT HAND COLUMN DATA
if (open(RB, "rightbar.txt")) {
while (<RB>){
print NEW $_;
}
}
else{
print NEW "\n\t<!-- CANNOT FIND rightbar.txt with HTML DATA-->\n";
}
print NEW "\t<!-- END OF RIGHTHAND COLUMN-->\n";
close(RB);
####### END SPECIAL RIGHT HAND COLUMN DATA
#create outer table for rightbar
print NEW "\t </TD></TR></TABLE>\n\n";
#END OF NEWS TABLE, START COMICS TABLE
print NEW "\n\n\n<!-------- END NEWS : START COMICS -------------->\n\n\n";
print NEW "<TABLE border=\"0\">\n <TR>\n <TD colspan=\"8\" valign=\"top\">\n";
if (!($comics_on)) {
print NEW "\n\n\n<!--------- COMICS ARE OFF ------------------->\n\n\n";
print NEW "<h3>Comics are off</h3>\n";
}
else{
print NEW "\n <!-------- COMICS:Dilbert SECTION -------------->\n";
$comic = "";
#DILBDERT URL PARSER:
print NEW " <hr><FONT size=\"5\" color=\"blue\">Comics</FONT>\n <P>\n";
$errorOccured=0;
foreach $line (@{$all_data[3]}) {
&html_error($line);
if($line =~ /Today\'s Dilbert Comic/){
$line =~ s/\<\/?TD.*?\>//g;
$line =~ s/\<\/?BR.*?\>//g;
$line =~ s/\<\/?a.*?\>//gi;
$line =~ s/\<\!\-\-.*?\-\-\>//g;
#<IMG SRC="/images/ffffff_dot.gif"
$line =~ s/\<IMG SRC\=\"\/images\/fffff.*?\>//g;
#$line =~ s/\<IMG SRC=\".*?mystery_artist.*?\>//;
#<IMG SRC="/comics/dilbert/
$line =~ s/\<IMG\s*SRC\=\"\/comics\/dilbert//g;
$line =~ s/gif\".*/gif\"/;
$line =~ s/^\s+//g;
$comic = $line;
print NEW " <A STYLE=\"text-decoration:none\" href=\"http://www.dilbert.com\">";
print NEW "<IMG border=\"0\" SRC=\"http://www.dilbert.com/comics/dilbert$comic></A>\n";
}
}
print NEW " </P>\n";
#END DILBERT URL PARSER:
########################################
#PENNY ARCADE URL PARSER:
print NEW "\n <!-------- COMICS:Penny Arcade SECTION -------------->\n";
$comic = "";
print NEW " <P>\n";
$errorOccured=0;
foreach $line (@{$all_data[4]}) {
&html_error($line);
#Ack! Y2K10 bug!!!!
if($line =~ /images\/200/){
$line =~ s/\<\/?td.*?\>//g;
$line =~ s/ALT\=\"\"//;
$line =~ s/c\=\"/c\=\"http\:\/\/www\.penny\-arcade\.com\//;
$line =~ s/\s+/ /g;
$line =~ s/\<img/\<img border\=\"0\"/;
print NEW " <A STYLE=\"text-decoration:none\" href=\"http://www.penny-arcade.com\">";
print NEW $line;
print NEW "</A>\n";
}
}
print NEW " </P>\n";
#END PENNY ARCADE URL PARSER:
########################################
#PVP SECTION
print NEW "\n <!-------- COMICS:PVP Online SECTION -------------->\n";
print NEW " <P>\n";
$errorOccured=0;
foreach $line (@{$all_data[5]}) {
&html_error($line);
if($line =~ /IMG.*?archive\//){
$line =~ s/C\=\"/C\=\"http\:\/\/www\.pvponline\.com\//;
$line =~ s/\<\/?center.*?\>//g;
$line =~ s/\<\/?CENTER.*?\>//g;
$line =~ s/\<\/?br.*?\>//g;
$line =~ s/\<\/?td.*?\>//g;
#remove extra <img> tag
$line =~ s/\<img.*?images.*?\>//;
$line =~ s/\<IMG/\<IMG align\=\"left\" border\=\"0\"/;
#fix: remove align="left"
$line =~ s/align\=\"left\"//g;
print NEW " <A STYLE=\"text-decoration:none\" href=\"http://www.pvponline.com/\">";
print NEW $line;
print NEW "</A>\n";
}
}
print NEW " </P>\n";
#END PVP URL PARSER
########################################
#Diesel Sweeties
print NEW "\n <!-------- COMICS:Diesel Sweeties SECTION -------------->\n";
print NEW " <P>\n";
$errorOccured=0; $linecount=0;
foreach $line (@{$all_data[6]}) {
&html_error($line);
$linecount++;
if($line =~ /\*\*\*newest/){
$line =~ s/.*?\*\*\*newest//;
$line =~ s/.*?\<img/\<img/i;
$line =~ s/\>.*/\>/;
$line =~ s/^\s*//;
# $line =~ s/\<img/\<img align\=\"right\"/i;
print NEW " <A STYLE=\"text-decoration:none\" href=\"http://www.dieselsweeties.com/\">";
print NEW $line ."\n";
print NEW " </A>\n";
}
}
print NEW " </P>\n\n\n";
}#comicsoff if statement.
########################################
#Reuters Top Images
print NEW "\n <!-------- News Images from Reuters-------------->\n";
&html_border();
print NEW "<A STYLE=\"text-decoration:none\"";
print NEW "href=\"http://www.reuters.com/newsPhotoGallery.jhtml?type=topNews\">";
print NEW "<FONT size=\"5\" color=\"orange\"><b>Reuters News</b></FONT></A><BR>\n";
&end_border();
print NEW " <P>\n";
$valid_data=0;
$errorOccured=0;
foreach $line (@{$all_data[19]}) {
&html_error($line);
#if($line =~ /class\=\"photoLink\"/){
print NEW " $line<br>\n";
#}
}
print NEW " </P>\n </TD>\n </TR></TABLE>\n";
&print_error( "\n\nFinished. Closing $Output_file with the results.\n");
print NEW " </TD>\n </TR>\n</Table>\n";
########################################
#QOTD SECTION
#create row for qotd
print NEW "\n\n<!-------- Quote of the Day SECTION -------------->\n";
print NEW "<div id=\"qotd\" background=\"#555555\" style=\"visibility:hidden; position:absolute; left:270;\">\n";
print NEW "\t<TABLE border=\"0\" width=\"356\"><tr><td bgcolor=\"#999999\">\n";
print NEW "\t<TABLE border=\"0\" width=\"350\"><tr><td bgcolor=\"#eeeeee\" align=\"center\">\n";
print NEW "\t <font size=\"3\"><b>Quote of the Day</b></font><br>\n";
#QOTD
$data_flag=0;
foreach $line (@{$all_data[13]}) {
if($line =~ /\(Internal Server Error\)/ ){
print NEW "\t <small> $line </small>\n";
}
if ($data_flag==1){
$line =~ s/\<A.*?\<\/A\>//;
print NEW "\t <small>\n\t <a href=\"$url[13]\"";
print NEW " style=\"text-decoration:none\"><font color=\"black\">";
print NEW $line;
print NEW "</a></font>\n\t </small>\n";
$data_flag=0;
}
if($line=~/randomly selected quote/){
$data_flag=1;
}
}
&end_border();
print NEW "</div>\n";
#END QOTD SECTION
########################################
#Upcoming Events SECTION
print NEW "\n\n<!-------- EVENTS SECTION -------------->\n\n";
print NEW "<div id=\"UpcomingEvents\" background=\"white\"";
print NEW " style=\"visibility:hidden; position:absolute; top:100; left:30;\">\n";
&html_border();
#events table
print NEW "<table><tr>\n";
print NEW "\n\n<!-------- Mondavi Arts Center -------------->\n";
print NEW "<td align=\"left\" valign=\"top\" width=\"30%\">\n";
#MONDAVI ARTS CENTER
print NEW "<small><b>Mondavi Center</b><br>\n";
$errorOccured=0;
$data_flag=0; $more_info=0;
foreach $line (@{$all_data[14]}) {
$line_counter++;
&html_error($line);
#look for good data indicator:
#if($line=~/Begin lasso content/){
if($line=~/Featured Event\"/){
$data_flag=1;
#remove everything before our indiciator.
$line =~ s/.*?\<img.*?Featured Event\"/\<img Featured Events\"/;
#print NEW "\n<!--\n$line_counter:$line\n-->\n";
#data is all one line for some reason.
@lasso_data = split(/\<\/a>/,$line);
foreach $line2 (@lasso_data) {
if (($data_flag==1) || ($more_info==1)){
if (($line2=~/announcements\.gif/) || ($line2 =~ /Post-Performance/)){
$data_flag=0;
}
if ($line2=~/\<\/div/) {
$more_info=0;
}
if ($line2 =~ /blackcopysmall/) {
$more_info=1;
}
if (($data_flag==1) && ($line2 =~ /whitelink2/)){
$line2 =~ s/\<\!\-\-.*?\-\-\>//g;
$line2 =~ s/\&.*?\;//g;
$line2 =~ s/\<\/?td.*?\>//g;
$line2 =~ s/\<\/?tr.*?\>//g;
$line2 =~ s/\<\/?span.*?\>//g;
$line2 =~ s/\<\/?blockquote.*?\>//g;
$line2 =~ s/\<\/?br.*?\>//g;
$line2 =~ s/\<\/?p.*?\>//g;
$line2 =~ s/\<\/?img.*?\>//g;
$line2 =~ s/\<\/?div.*?\>//g; #########
$line2 =~ s/\<\/?table.*?\>//g;
$line2 =~ s/\<a href\=\"/\<a href\=\"http\:\/\/www\.mondaviarts\.org\//;
$line2 =~ s/\s+/ /g;
$line2 =~ s/^\s+//;
print NEW "$line2</a><br> ";
}
elsif (($data_flag==1) || ($more_info==1)){
$line2 =~ s/\<\!\-\-.*?\-\-\>//g;
#get rid of extra <a link
$line2 =~ s/\<\/?a.*?\>//g;
$line2 =~ s/\&.*?\;//g;
$line2 =~ s/\<\/?td.*?\>//g;
$line2 =~ s/\<\/?blockquote.*?\>//g;
$line2 =~ s/\<\/?br.*?\>//g;
$line2 =~ s/\<\/?p.*?\>//g;
$line2 =~ s/\<\/?tr.*?\>//g;
$line2 =~ s/\<\/?td.*?\>//g;
$line2 =~ s/\<\/?span.*?\>//g;
$line2 =~ s/\<\/?img.*?\>//g;
$line2 =~ s/\<\/?div.*?\>//g; #########
$line2 =~ s/\<\/?table.*?\>//g;
$line2 =~ s/\s+/ /g;
$line2 =~ s/Post-Performance.*//;
if ($line2 =~ /[a-zA-Z<]/) {
print NEW "$line2<BR>\n";
}
}
}
}
}
}
print NEW "</small>\n";
#end mondavi column
print NEW "</td>\n";
#UC Berkley
print NEW "\n\n<!-------- UC Berkley -------------->\n";
print NEW "<td align=\"left\" valign=\"top\" width=\"30%\">\n";
print NEW "<small><b>UC Berkley</b><br>\n";
$errorOccured=0; $limit=34;
$pre_data_flag = 0; $data_flag=0; $a_tag_start=0;
foreach $line (@{$all_data[15]}) {
&html_error($line);
if ($line =~ /\#top/) {
#get rid of everything after #top
$line =~ s/\#top.*/\#top/;
$data_flag=0;
$pre_data_flag=0;
}
if ($line =~ /Events this week/) {
$pre_data_flag=1;
#print NEW "--$line--\n";
}
if (($pre_data_flag==1) && ($line =~ /\<\/tr\>/)) {
$data_flag=1;
}
if (($data_flag==1) && ($limit > 0)){
$line =~ s/\<\/?td.*?\>//g;
$line =~ s/\<\/?img.*?\>//g;
$line =~ s/\<\/?tr.*?\>//g;
$line =~ s/\<\/?table.*?\>//g;
$line =~ s/\s+/ /g;
$line =~ s/^\s+//g;
$line =~ s/\<\/a\>/\<\/a\>\<br\>\n/g;
$line =~ s/\<a href\=\"\#/\<a href\=\"http\:\/\/www\.berkeley\.edu\/calendar\/\#/;
print NEW "$line";
$limit--;
}
}
print NEW "</small>\n";
print NEW "</td>\n";
#end Berkely column
#EPICKS
print NEW "\n\n<!-------- SF E-Picks -------------->\n";
print NEW "<td align=\"left\" valign=\"top\" width=\"30%\">\n";
print NEW "<small><b>SF Gate</b><br>\n";
$errorOccured=0;
$data_flag=0;
foreach $line (@{$all_data[16]}) {
&html_error($line);
if ($line =~ /Movies:/) {
$data_flag=1;
}
if ($line =~ /\TABLE/) {
$data_flag=0;
}
if ($data_flag==1){
$line =~ s/\<\/?TD.*?\>//g;
$line =~ s/\<\/?TR.*?\>//g;
$line =~ s/\<\/?FONT.*?\>//g;
$line =~ s/\<\/?P.*?\>//g;
$line =~ s/HREF\=\"\#/href\=\"http\:\/\/sfgate\.com\/eguide\/epicks\/\#/;
$line =~ s/^\s+//g;
if ($line =~ /\S+/) {
print NEW "$line\n";
}
}
}
print NEW "</small>\n";
print NEW "</td>\n";
#EPICKS
print NEW " </tr>\n";
print NEW "</table>\n";
&end_border();
print NEW "</div>\n\n";
#END Events SECTION
print NEW " </TD></TR>\n";
########################################
#COPYRIGHT WARNING
print NEW "\n<p>\n <center>\n <small>\n";
print NEW " <!----COPYRIGHT WARNING---->\n <b>COPYRIGHT WARNING</b><br>\n";
print NEW " Webpage content is owned by corresponding webpage owner,";
print NEW " see individual pages for details.\n";
print NEW " </small>\n </center>\n</p>\n\n";
print NEW "</BODY>\n</HTML>\n";
print NEW "<!--END OF FILE-->\n";
close(NEW);
if($logfiles){
close(LOGFILE);
}
##########################################
#FUNCTION : SHORTEN_LINE
sub print_error{
$toPrint = $_[0];
if ($logfiles){
print LOGFILE $toPrint;
print $toPrint;
}
}
sub shorten_line{
$long_string = $_[0];
#look for too many uppercase letters
if ($long_string =~ /[A-Z].*?[A-Z].*?[A-Z].*?[A-Z].*?[A-Z].*?[A-Z].*?[A-Z]/) {
$column_temp=$column_width;
}
else{
#set full length of string.
$column_temp=$column_width+3;
}
if ($long_string =~ /\>([^<]{$column_temp,})/ ) {
# if ($long_string =~ /\>(.){$column_temp,})?\<\/a\>/ ) { #will crash script
$temp_1a = $1;
$line2 = substr($temp_1a,0,$column_temp-3);
$line2 = $line2."...";
#remove quotes and insert ascii for quote
$line2 =~ s/\"/\"\;/g;
$temp_1a =~ s/\"/\"\;/g;
$long_string =~ s/\<a/\<a title\=\"$temp_1a\"/;
$long_string =~ s/\>([^<]*)/\>$line2/;
}
return $long_string;
}
sub html_border{
print NEW "\n\t<!--BORDER-2 START-->\n";
print NEW "\t<TABLE border=\"0\" width=\"100%\"><tr><td bgcolor=\"#999999\">\n";
print NEW "\t<TABLE border=\"0\" width=\"100%\"><tr><td bgcolor=\"#eeeeee\" align=\"center\">\n";
print NEW "\t<!--BORDER-2 START-->\n\n";
}
sub end_border{
print NEW "\n\t<!--BORDER-2 END-->\n";
print NEW "\t</td></tr></table>\n";
print NEW "\t</td></tr></table>\n";
print NEW "\t<!--BORDER-2 END-->\n\n";
}
sub html_error{
$error_line = $_[0];
#handle failed html lookups
if( ($error_line =~ /\(Internal Server Error\)/) || ($errorOccured>0 && $errorOccured<3)){
print NEW $error_line;
$errorOccured++;
}
}
#EOF#