Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ First argument is an object containing settings for the "request" instance used
});
}
);

### Parallel

First argument is an array containing either strings or objects, second is a callback which exposes a jQuery object with your scraped site as "body" and third is an object from the request containing info about the url.

**You can also add rate limiting to the fetcher by adding an options object as the third argument containing 'reqPerSec': float.**
Expand All @@ -58,9 +60,11 @@ First argument is an array containing either strings or objects, second is a cal
}
, 'http://search.twitter.com/search?q=html5'
]
, function(err, $) {
, function(err, $, requestOptions) {
if (err) {throw err;}

console.log(requestOptions.uri);

$('.msg').each(function() {
console.log($(this).text().trim()+'\n');
});
Expand Down
2 changes: 1 addition & 1 deletion examples/advanced.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ scraper({
, 'headers': {
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)'
}}
, function(err, $) {
, function(err, $, requestOptions) {
if (err) {throw err;}

$('.msg').each(function() {
Expand Down
2 changes: 1 addition & 1 deletion examples/parallel.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ scraper([
}
, 'http://search.twitter.com/search?q=html5'
]
, function(err, $) {
, function(err, $, requestOptions) {
if (err) {throw err;}

$('.msg').each(function() {
Expand Down
6 changes: 3 additions & 3 deletions lib/scraper.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ module.exports = function scrape(requestOptions, callback, fetchOptions) {
}

if (!requestOptions['uri']) {
callback(new Error('You must supply an uri.'), null, null);
callback(new Error('You must supply an uri.'), null, requestOptions);
}

request(requestOptions, function (err, response, body) {
Expand All @@ -57,10 +57,10 @@ module.exports = function scrape(requestOptions, callback, fetchOptions) {
jsdom.jQueryify(window, __dirname+'/../deps/jquery-1.6.1.min.js', function(win, $) {
$('head').append($(body).find('head').html());
$('body').append($(body).find('body').html());
callback(null, $);
callback(null, $, requestOptions);
});
} else {
callback(new Error('Request to '+requestOptions['uri']+' ended with status code: '+(typeof response !== 'undefined' ? response.statusCode : 'unknown')), null, null);
callback(new Error('Request to '+requestOptions['uri']+' ended with status code: '+(typeof response !== 'undefined' ? response.statusCode : 'unknown')), null, requestOptions);
}
});
})
Expand Down