Re: Threading/Blocking and avoiding the Beachball,
by clanmills
Todd
Can I validate the specification here with you, please? I've written a little script:
#!/bin/bash
count=0
while read line;
do
count=$(($count+1));
url=http://clanmills.com/${line}
echo url = $url lines = `curl --silent http://clanmills.com/${line} | wc -l`
done < $1
echo "processed : $count files"
which reads the file:
BoydWedding.html
NicholasBirthday.html
arizona.html
dennis.html
eventsevents.html
gps.html
gpsfoo.html
popup.html
rollovermap.html
rollovermap1.html
statesonly.html
utah.html
and produces the output:
543 /home/rmills/temp$ readem.sh readem.txt
url = http://clanmills.com/BoydWedding.html lines = 10
url = http://clanmills.com/NicholasBirthday.html lines = 10
url = http://clanmills.com/arizona.html lines = 14
url = http://clanmills.com/dennis.html lines = 28
url = http://clanmills.com/eventsevents.html lines = 509
url = http://clanmills.com/gps.html lines = 83
url = http://clanmills.com/gpsfoo.html lines = 280
url = http://clanmills.com/popup.html lines = 32
url = http://clanmills.com/rollovermap.html lines = 104
url = http://clanmills.com/rollovermap1.html lines = 104
url = http://clanmills.com/statesonly.html lines = 10
url = http://clanmills.com/utah.html lines = 10
processed : 12 files
So, it's reading the names of files from readem.txt and building URLs which it then gets using curl. For simplicity, all I'm doing in a simple line count on the file as proof that all the files are different.
I've written a command-line tool version of this in Obj/C++ (code below) and here's the output:
2010-03-14 20:57:28.599 readem[25593:a0f] http://clanmills.com/BoydWedding.html lines = 11
2010-03-14 20:57:28.697 readem[25593:a0f] http://clanmills.com/NicholasBirthday.html lines = 11
2010-03-14 20:57:28.794 readem[25593:a0f] http://clanmills.com/arizona.html lines = 15
2010-03-14 20:57:28.890 readem[25593:a0f] http://clanmills.com/dennis.html lines = 29
2010-03-14 20:57:29.091 readem[25593:a0f] http://clanmills.com/eventsevents.html lines = 510
2010-03-14 20:57:29.191 readem[25593:a0f] http://clanmills.com/gps.html lines = 84
2010-03-14 20:57:29.300 readem[25593:a0f] http://clanmills.com/gpsfoo.html lines = 281
2010-03-14 20:57:29.397 readem[25593:a0f] http://clanmills.com/popup.html lines = 33
2010-03-14 20:57:29.498 readem[25593:a0f] http://clanmills.com/rollovermap.html lines = 105
2010-03-14 20:57:29.601 readem[25593:a0f] http://clanmills.com/rollovermap1.html lines = 105
2010-03-14 20:57:29.697 readem[25593:a0f] http://clanmills.com/statesonly.html lines = 11
2010-03-14 20:57:29.793 readem[25593:a0f] http://clanmills.com/utah.html lines = 11
2010-03-14 20:57:29.984 readem[25593:a0f] http://clanmills.com/ lines = 212
2010-03-14 20:57:29.984 readem[25593:a0f] processed = 13 files
Let's ignore the arithmetic isn't quite correct and the Obj/C++ version seems to count an extra line (and has 13 files instead of 12). Let's not worry about those details which have to do with a trailing blank line both in readem.txt and the code from pulled from the internet.
Here's the code:
#import <Foundation/Foundation.h>
static int lineCount(NSString* s)
{
NSArray* listItems = [s componentsSeparatedByString:@"\n"];
return [listItems count] ;
}
int main (int argc, const char * argv[])
{
if ( argc != 2 ) return printf("syntax: readem file") ;
NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init];
NSStringEncoding encoding ;
NSError* error ;
NSUInteger urlCount = 0 ;
// get the lines from the input file
NSString* fileName = [NSString stringWithUTF8String:argv[1]];
NSString* fileString = [NSString
stringWithContentsOfFile : fileName
usedEncoding : &encoding
error : &error
];
NSArray* lines = [fileString componentsSeparatedByString:@"\n"] ;
// run over the lines, convert them to URLs and read them from clanmills.com
NSUInteger i ;
for ( i = 0 ; i < [lines count ] ; i++ ) {
urlCount ++ ;
NSString* urlString = [NSString stringWithFormat:@"http://clanmills.com/%@",[lines objectAtIndex:i]];
NSURL* url = [NSURL URLWithString:urlString];
NSString* s = [NSString
stringWithContentsOfURL : url
usedEncoding : &encoding
error : &error
];
NSLog(@"%@ lines = %d",urlString,lineCount(s)) ;
}
NSLog(@"processed = %d files",urlCount) ;
[pool drain];
return 0;
}
