From 4318a4b6399f9e334b47a9a2735ec331c9727bb9 Mon Sep 17 00:00:00 2001 From: Will Rossiter Date: Sat, 16 Feb 2013 18:09:03 +1300 Subject: [PATCH] Pass request_options to the callback (Fixes #13) Utilizes the third argument of the callback to pass back the request options for the given URL. Assists handling with parallel requests. --- README.md | 6 +++++- examples/advanced.js | 2 +- examples/parallel.js | 2 +- lib/scraper.js | 6 +++--- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 7f0daaa..abdfad1 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,9 @@ First argument is an object containing settings for the "request" instance used }); } ); + ### Parallel + First argument is an array containing either strings or objects, second is a callback which exposes a jQuery object with your scraped site as "body" and third is an object from the request containing info about the url. **You can also add rate limiting to the fetcher by adding an options object as the third argument containing 'reqPerSec': float.** @@ -58,9 +60,11 @@ First argument is an array containing either strings or objects, second is a cal } , 'http://search.twitter.com/search?q=html5' ] - , function(err, $) { + , function(err, $, requestOptions) { if (err) {throw err;} + console.log(requestOptions.uri); + $('.msg').each(function() { console.log($(this).text().trim()+'\n'); }); diff --git a/examples/advanced.js b/examples/advanced.js index 96b7b53..34c7738 100644 --- a/examples/advanced.js +++ b/examples/advanced.js @@ -5,7 +5,7 @@ scraper({ , 'headers': { 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)' }} - , function(err, $) { + , function(err, $, requestOptions) { if (err) {throw err;} $('.msg').each(function() { diff --git a/examples/parallel.js b/examples/parallel.js index 4e3e9f3..009e357 100644 --- a/examples/parallel.js +++ b/examples/parallel.js @@ -11,7 +11,7 @@ scraper([ } , 'http://search.twitter.com/search?q=html5' ] - , function(err, $) { + , function(err, $, requestOptions) { if (err) {throw err;} $('.msg').each(function() { diff --git a/lib/scraper.js b/lib/scraper.js index 6ca44ab..7723021 100644 --- a/lib/scraper.js +++ b/lib/scraper.js @@ -43,7 +43,7 @@ module.exports = function scrape(requestOptions, callback, fetchOptions) { } if (!requestOptions['uri']) { - callback(new Error('You must supply an uri.'), null, null); + callback(new Error('You must supply an uri.'), null, requestOptions); } request(requestOptions, function (err, response, body) { @@ -57,10 +57,10 @@ module.exports = function scrape(requestOptions, callback, fetchOptions) { jsdom.jQueryify(window, __dirname+'/../deps/jquery-1.6.1.min.js', function(win, $) { $('head').append($(body).find('head').html()); $('body').append($(body).find('body').html()); - callback(null, $); + callback(null, $, requestOptions); }); } else { - callback(new Error('Request to '+requestOptions['uri']+' ended with status code: '+(typeof response !== 'undefined' ? response.statusCode : 'unknown')), null, null); + callback(new Error('Request to '+requestOptions['uri']+' ended with status code: '+(typeof response !== 'undefined' ? response.statusCode : 'unknown')), null, requestOptions); } }); })