From e26b8cb352c83e0e4d2be210e332001342867b0b Mon Sep 17 00:00:00 2001 From: eriktiemens Date: Tue, 30 Jul 2013 16:21:02 +0200 Subject: [PATCH 01/17] Added message to error response --- lib/shrimp/middleware.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/shrimp/middleware.rb b/lib/shrimp/middleware.rb index 96e8050..fbaf80b 100644 --- a/lib/shrimp/middleware.rb +++ b/lib/shrimp/middleware.rb @@ -12,12 +12,12 @@ def initialize(app, options = { }, conditions = { }) def call(env) @request = Rack::Request.new(env) - if render_as_pdf? #&& headers['Content-Type'] =~ /text\/html|application\/xhtml\+xml/ + if render_as_pdf? if already_rendered? && (up_to_date?(@options[:cache_ttl]) || @options[:cache_ttl] == 0) if File.size(render_to) == 0 File.delete(render_to) remove_rendering_flag - return error_response + return error_response("PDF file invalid") end return ready_response if env['HTTP_X_REQUESTED_WITH'] file = File.open(render_to, "rb") @@ -34,7 +34,7 @@ def call(env) if rendering_in_progress? if rendering_timed_out? remove_rendering_flag - error_response + error_response("Rendering timeout") else reload_response(@options[:polling_interval]) end @@ -156,13 +156,13 @@ def ready_response [200, headers, [body]] end - def error_response + def error_response(message) body = <<-HTML.gsub(/[ \n]+/, ' ').strip -

Sorry request timed out...

+

Sorry request timed out... #{message}

HTML From f6686e8ae06cc6a7a3a58abd6ed69ed560c99e41 Mon Sep 17 00:00:00 2001 From: Martin Gotink Date: Fri, 21 Mar 2014 11:26:51 +0100 Subject: [PATCH 02/17] Require rack >= 1.4.1 in stead of = 1.4.1 --- shrimp.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shrimp.gemspec b/shrimp.gemspec index 755850d..59995fc 100644 --- a/shrimp.gemspec +++ b/shrimp.gemspec @@ -23,5 +23,5 @@ Gem::Specification.new do |gem| gem.add_development_dependency(%q, [">=0.9.2"]) gem.add_development_dependency(%q, [">= 2.2.0"]) gem.add_development_dependency(%q, [">= 0.5.6"]) - gem.add_development_dependency(%q, ["= 1.4.1"]) + gem.add_development_dependency(%q, [">= 1.4.1"]) end From 37b5ecbd3fdac6d8c54a41c4b59060778abe26a8 Mon Sep 17 00:00:00 2001 From: Martin Gotink Date: Fri, 21 Mar 2014 11:27:17 +0100 Subject: [PATCH 03/17] Fix typo in comment --- shrimp.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shrimp.gemspec b/shrimp.gemspec index 59995fc..05d2549 100644 --- a/shrimp.gemspec +++ b/shrimp.gemspec @@ -19,7 +19,7 @@ Gem::Specification.new do |gem| gem.requirements << 'phantomjs, v1.6 or greater' gem.add_runtime_dependency "json" - # Developmnet Dependencies + # Development Dependencies gem.add_development_dependency(%q, [">=0.9.2"]) gem.add_development_dependency(%q, [">= 2.2.0"]) gem.add_development_dependency(%q, [">= 0.5.6"]) From 5cde07a6a186257acb0de3d25a5420e248e3e861 Mon Sep 17 00:00:00 2001 From: Martin Gotink Date: Fri, 21 Mar 2014 12:38:01 +0100 Subject: [PATCH 04/17] Refactor middleware spec --- spec/shrimp/middleware_spec.rb | 110 +++++++++++++++++++-------------- 1 file changed, 63 insertions(+), 47 deletions(-) diff --git a/spec/shrimp/middleware_spec.rb b/spec/shrimp/middleware_spec.rb index 48e720f..ae5cac6 100644 --- a/spec/shrimp/middleware_spec.rb +++ b/spec/shrimp/middleware_spec.rb @@ -24,11 +24,7 @@ def mock_app(options = { }, conditions = { }) describe Shrimp::Middleware do before { mock_app(options) } - context "matching pdf" do - it "should render as pdf" do - get '/test.pdf' - @middleware.send(:'render_as_pdf?').should be true - end + describe "rendering the PDF" do it "should return 503 the first time" do get '/test.pdf' last_response.status.should eq 503 @@ -42,11 +38,6 @@ def mock_app(options = { }, conditions = { }) last_response.header["Retry-After"].should eq "1" end - it "should set render to to outpath" do - get '/test.pdf' - @middleware.send(:render_to).should match (Regexp.new("^#{options[:out_path]}")) - end - it "should return 504 on timeout" do get '/test.pdf' sleep 1 @@ -72,52 +63,77 @@ def mock_app(options = { }, conditions = { }) last_response.status.should eq 200 last_response.body.should eq "Hello World" end - - end - context "not matching pdf" do - it "should skip pdf rendering" do - get 'http://www.example.org/test' - last_response.body.should include "Hello world!" - @middleware.send(:'render_as_pdf?').should be false - end - end -end - -describe "Conditions" do - context "only" do - before { mock_app(options, :only => [%r[^/invoice], %r[^/public]]) } - it "render pdf for set only option" do - get '/invoice/test.pdf' - @middleware.send(:'render_as_pdf?').should be true - end - it "render pdf for set only option" do - get '/public/test.pdf' - @middleware.send(:'render_as_pdf?').should be true - end - - it "not render pdf for any other path" do - get '/secret/test.pdf' - @middleware.send(:'render_as_pdf?').should be false + describe "#render_to" do + it "should be set to the outpath" do + get '/test.pdf' + @middleware.send(:render_to).should match (Regexp.new("^#{options[:out_path]}")) end end - context "except" do - before { mock_app(options, :except => %w(/secret)) } - it "render pdf for set only option" do - get '/invoice/test.pdf' - @middleware.send(:'render_as_pdf?').should be true + describe "#render_as_pdf?" do + context "without conditions" do + context "the URL contains .pdf" do + it "should render as pdf" do + get '/test.pdf' + @middleware.send(:'render_as_pdf?').should be true + end + end + + context "the URL doesn't contain .pdf" do + it "should skip pdf rendering" do + get 'http://www.example.org/test' + last_response.body.should include "Hello world!" + @middleware.send(:'render_as_pdf?').should be false + end + end end - it "render pdf for set only option" do - get '/public/test.pdf' - @middleware.send(:'render_as_pdf?').should be true + context "with only condition" do + before { mock_app(options, :only => [%r[^/invoice], %r[^/public]]) } + + context "when the url is in the only condition" do + it "render pdf for set only option" do + get '/invoice/test.pdf' + @middleware.send(:'render_as_pdf?').should be true + end + + it "render pdf for set only option" do + get '/public/test.pdf' + @middleware.send(:'render_as_pdf?').should be true + end + end + + context "when the url isn't in the only condition" do + it "not render pdf for any other path" do + get '/secret/test.pdf' + @middleware.send(:'render_as_pdf?').should be false + end + end end - it "not render pdf for any other path" do - get '/secret/test.pdf' - @middleware.send(:'render_as_pdf?').should be false + context "with except condition" do + before { mock_app(options, :except => %w(/secret)) } + + context "when the url isn't in the except condition" do + it "render pdf for set only option" do + get '/invoice/test.pdf' + @middleware.send(:'render_as_pdf?').should be true + end + + it "render pdf for set only option" do + get '/public/test.pdf' + @middleware.send(:'render_as_pdf?').should be true + end + end + + context "when the url is in the except condition" do + it "not render pdf for any other path" do + get '/secret/test.pdf' + @middleware.send(:'render_as_pdf?').should be false + end + end end end end From 668cfef662730fbbfd2e49f4f7f8ae5dbc06ac4c Mon Sep 17 00:00:00 2001 From: Martin Gotink Date: Fri, 21 Mar 2014 14:11:48 +0100 Subject: [PATCH 05/17] Correctly handle parameters in a URL If a URL with parameters (like http://example.org/test.pdf?x=42) is requested generating the PDF will fail because the .pdf extension is not stripped from the URL. The regular expression for replacing the .pdf extension now correctly handles this case. --- lib/shrimp/middleware.rb | 6 +++++- spec/shrimp/middleware_spec.rb | 23 +++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/lib/shrimp/middleware.rb b/lib/shrimp/middleware.rb index fbaf80b..c386d66 100644 --- a/lib/shrimp/middleware.rb +++ b/lib/shrimp/middleware.rb @@ -54,7 +54,11 @@ def call(env) # Private: start phantom rendering in a separate process def fire_phantom - Process::detach fork { Phantom.new(@request.url.sub(%r{\.pdf$}, ''), @options, @request.cookies).to_pdf(render_to) } + Process::detach fork { Phantom.new(phantom_request_url, @options, @request.cookies).to_pdf(render_to) } + end + + def phantom_request_url + @request.url.sub(%r{\.pdf(\?.*)?$}, '\1') end def render_to diff --git a/spec/shrimp/middleware_spec.rb b/spec/shrimp/middleware_spec.rb index ae5cac6..f13978a 100644 --- a/spec/shrimp/middleware_spec.rb +++ b/spec/shrimp/middleware_spec.rb @@ -81,6 +81,13 @@ def mock_app(options = { }, conditions = { }) end end + context "the URL contains .pdf and parameters" do + it "should render as pdf" do + get '/test.pdf?x=42' + @middleware.send(:'render_as_pdf?').should be true + end + end + context "the URL doesn't contain .pdf" do it "should skip pdf rendering" do get 'http://www.example.org/test' @@ -136,4 +143,20 @@ def mock_app(options = { }, conditions = { }) end end end + + describe "#phantom_request_url" do + context "when the URL has no parameters" do + it "removes the .pdf extension" do + get 'http://example.org/test.pdf' + @middleware.send(:phantom_request_url).should eq 'http://example.org/test' + end + end + + context "when the URL has parameters" do + it "it preserves the parameters but removes the .pdf extension" do + get 'http://example.org/test.pdf?x=42' + @middleware.send(:phantom_request_url).should eq 'http://example.org/test?x=42' + end + end + end end From 8964d21b95494aacfaee460e362110841863cb22 Mon Sep 17 00:00:00 2001 From: Martin Gotink Date: Fri, 21 Mar 2014 15:06:46 +0100 Subject: [PATCH 06/17] Render to different output files when URL parameters differ Only the path of the URL is used to create a unique output file name, but different URL parameters might result in different contents so a different file name should be used. The whole URL (including the parameters) will now be used to generate a unique file name. --- lib/shrimp/middleware.rb | 2 +- spec/shrimp/middleware_spec.rb | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/lib/shrimp/middleware.rb b/lib/shrimp/middleware.rb index c386d66..680af92 100644 --- a/lib/shrimp/middleware.rb +++ b/lib/shrimp/middleware.rb @@ -62,7 +62,7 @@ def phantom_request_url end def render_to - file_name = Digest::MD5.hexdigest(@request.path) + ".pdf" + file_name = Digest::MD5.hexdigest(@request.url) + ".pdf" file_path = @options[:out_path] "#{file_path}/#{file_name}" end diff --git a/spec/shrimp/middleware_spec.rb b/spec/shrimp/middleware_spec.rb index f13978a..24625d7 100644 --- a/spec/shrimp/middleware_spec.rb +++ b/spec/shrimp/middleware_spec.rb @@ -70,6 +70,24 @@ def mock_app(options = { }, conditions = { }) get '/test.pdf' @middleware.send(:render_to).should match (Regexp.new("^#{options[:out_path]}")) end + + it "should be the same path if the URLs are the same" do + get '/test.pdf' + first_render_to = @middleware.send(:render_to) + get '/test.pdf' + second_render_to = @middleware.send(:render_to) + + first_render_to.should eq second_render_to + end + + it "should be a different path when the URL parameters differ" do + get '/test.pdf' + first_render_to = @middleware.send(:render_to) + get '/test.pdf?x=42' + second_render_to = @middleware.send(:render_to) + + first_render_to.should_not eq second_render_to + end end describe "#render_as_pdf?" do From f4ec9a840c0a3efd3aecc1e80124115815bf9774 Mon Sep 17 00:00:00 2001 From: Martin Gotink Date: Fri, 21 Mar 2014 15:41:04 +0100 Subject: [PATCH 07/17] Refactor render_as_pdf? method --- lib/shrimp/middleware.rb | 55 +++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/lib/shrimp/middleware.rb b/lib/shrimp/middleware.rb index 680af92..6782b36 100644 --- a/lib/shrimp/middleware.rb +++ b/lib/shrimp/middleware.rb @@ -96,32 +96,45 @@ def rendering_in_progress? end def render_as_pdf? - request_path_is_pdf = !!@request.path.match(%r{\.pdf$}) + return false unless request_path_is_pdf? - if request_path_is_pdf && @conditions[:only] - rules = [@conditions[:only]].flatten - rules.any? do |pattern| - if pattern.is_a?(Regexp) - @request.path =~ pattern - else - @request.path[0, pattern.length] == pattern - end - end - elsif request_path_is_pdf && @conditions[:except] - rules = [@conditions[:except]].flatten - rules.map do |pattern| - if pattern.is_a?(Regexp) - return false if @request.path =~ pattern - else - return false if @request.path[0, pattern.length] == pattern - end - end - return true + if @conditions[:only] + path_is_in_only_conditions? + elsif @conditions[:except] + path_is_not_in_except_conditions? else - request_path_is_pdf + true end end + def path_is_in_only_conditions? + rules = [@conditions[:only]].flatten + path_is_in_rules?(rules) + end + + def path_is_not_in_except_conditions? + rules = [@conditions[:except]].flatten + !path_is_in_rules?(rules) + end + + def path_is_in_rules?(rules) + rules.any? do |pattern| + path_matches_pattern?(pattern) + end + end + + def path_matches_pattern?(pattern) + if pattern.is_a?(Regexp) + @request.path =~ pattern + else + @request.path[0, pattern.length] == pattern + end + end + + def request_path_is_pdf? + !!@request.path.match(%r{\.pdf$}) + end + def concat(accepts, type) (accepts || '').split(',').unshift(type).compact.join(',') end From cb70202eadd3bf3d2a4070dfc7805682c38d7f98 Mon Sep 17 00:00:00 2001 From: Martin Gotink Date: Sat, 22 Mar 2014 16:17:16 +0100 Subject: [PATCH 08/17] Set secret for cookie to prevent rack warnings --- spec/shrimp/middleware_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/shrimp/middleware_spec.rb b/spec/shrimp/middleware_spec.rb index 24625d7..cedc990 100644 --- a/spec/shrimp/middleware_spec.rb +++ b/spec/shrimp/middleware_spec.rb @@ -17,7 +17,7 @@ def mock_app(options = { }, conditions = { }) } @middleware = Shrimp::Middleware.new(main_app, options, conditions) - @app = Rack::Session::Cookie.new(@middleware, :key => 'rack.session') + @app = Rack::Session::Cookie.new(@middleware, :key => 'rack.session', :secret => "secret") end From 2c3718223f4278f14df5f1687d2d5fb2c222a904 Mon Sep 17 00:00:00 2001 From: Martin Gotink Date: Sat, 22 Mar 2014 16:29:39 +0100 Subject: [PATCH 09/17] Set cache_ttl to 1 to prevent caching between tests --- spec/shrimp/middleware_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/shrimp/middleware_spec.rb b/spec/shrimp/middleware_spec.rb index cedc990..afc034b 100644 --- a/spec/shrimp/middleware_spec.rb +++ b/spec/shrimp/middleware_spec.rb @@ -6,7 +6,7 @@ def app; def options { :margin => "1cm", :out_path => Dir.tmpdir, - :polling_offset => 10, :polling_interval => 1, :cache_ttl => 3600, + :polling_offset => 10, :polling_interval => 1, :cache_ttl => 1, :request_timeout => 1 } end From 17374d56fb1deca9d971b64fe937a3b982cd1d98 Mon Sep 17 00:00:00 2001 From: Martin Gotink Date: Sat, 22 Mar 2014 16:54:21 +0100 Subject: [PATCH 10/17] Remove unused concat method --- lib/shrimp/middleware.rb | 5 ----- 1 file changed, 5 deletions(-) diff --git a/lib/shrimp/middleware.rb b/lib/shrimp/middleware.rb index 6782b36..acf601f 100644 --- a/lib/shrimp/middleware.rb +++ b/lib/shrimp/middleware.rb @@ -75,7 +75,6 @@ def up_to_date?(ttl = 30) (Time.now - File.new(render_to).mtime) <= ttl end - def remove_rendering_flag @request.session["phantom-rendering"] ||={ } @request.session["phantom-rendering"].delete(render_to) @@ -135,10 +134,6 @@ def request_path_is_pdf? !!@request.path.match(%r{\.pdf$}) end - def concat(accepts, type) - (accepts || '').split(',').unshift(type).compact.join(',') - end - def reload_response(interval=1) body = <<-HTML.gsub(/[ \n]+/, ' ').strip From 7fe38b940a866e9c40325aa68e246bc0b65f21df Mon Sep 17 00:00:00 2001 From: Martin Gotink Date: Sat, 22 Mar 2014 17:20:16 +0100 Subject: [PATCH 11/17] Move responses from Middleware to own class --- lib/shrimp.rb | 1 + lib/shrimp/middleware.rb | 66 ++++------------------------------ lib/shrimp/response.rb | 59 ++++++++++++++++++++++++++++++ spec/shrimp/middleware_spec.rb | 11 ++++++ 4 files changed, 77 insertions(+), 60 deletions(-) create mode 100644 lib/shrimp/response.rb diff --git a/lib/shrimp.rb b/lib/shrimp.rb index f12222d..ab3972c 100644 --- a/lib/shrimp.rb +++ b/lib/shrimp.rb @@ -3,3 +3,4 @@ require 'shrimp/phantom' require 'shrimp/middleware' require 'shrimp/configuration' +require 'shrimp/response' diff --git a/lib/shrimp/middleware.rb b/lib/shrimp/middleware.rb index acf601f..8b1a981 100644 --- a/lib/shrimp/middleware.rb +++ b/lib/shrimp/middleware.rb @@ -17,32 +17,28 @@ def call(env) if File.size(render_to) == 0 File.delete(render_to) remove_rendering_flag - return error_response("PDF file invalid") + return Response.error("PDF file invalid") end - return ready_response if env['HTTP_X_REQUESTED_WITH'] + return Response.ready(@request.path) if env['HTTP_X_REQUESTED_WITH'] file = File.open(render_to, "rb") body = file.read file.close File.delete(render_to) if @options[:cache_ttl] == 0 remove_rendering_flag - response = [body] - headers = { } - headers["Content-Length"] = (body.respond_to?(:bytesize) ? body.bytesize : body.size).to_s - headers["Content-Type"] = "application/pdf" - [200, headers, response] + Response.file(body) else if rendering_in_progress? if rendering_timed_out? remove_rendering_flag - error_response("Rendering timeout") + Response.error("Rendering timeout") else - reload_response(@options[:polling_interval]) + Response.reload(@options[:polling_interval]) end else File.delete(render_to) if already_rendered? set_rendering_flag fire_phantom - reload_response(@options[:polling_offset]) + Response.reload(@options[:polling_offset]) end end else @@ -133,55 +129,5 @@ def path_matches_pattern?(pattern) def request_path_is_pdf? !!@request.path.match(%r{\.pdf$}) end - - def reload_response(interval=1) - body = <<-HTML.gsub(/[ \n]+/, ' ').strip - - - - -

Preparing pdf...

- - - HTML - headers = { } - headers["Content-Length"] = body.size.to_s - headers["Content-Type"] = "text/html" - headers["Retry-After"] = interval.to_s - - [503, headers, [body]] - end - - def ready_response - body = <<-HTML.gsub(/[ \n]+/, ' ').strip - - - - - PDF ready here - - - HTML - headers = { } - headers["Content-Length"] = body.size.to_s - headers["Content-Type"] = "text/html" - [200, headers, [body]] - end - - def error_response(message) - body = <<-HTML.gsub(/[ \n]+/, ' ').strip - - - - -

Sorry request timed out... #{message}

- - - HTML - headers = { } - headers["Content-Length"] = body.size.to_s - headers["Content-Type"] = "text/html" - [504, headers, [body]] - end end end diff --git a/lib/shrimp/response.rb b/lib/shrimp/response.rb new file mode 100644 index 0000000..4f8ba44 --- /dev/null +++ b/lib/shrimp/response.rb @@ -0,0 +1,59 @@ +module Shrimp + class Response + + def self.file(contents) + headers = {} + headers["Content-Length"] = (contents.respond_to?(:bytesize) ? contents.bytesize : contents.size).to_s + headers["Content-Type"] = "application/pdf" + + [200, headers, [contents]] + end + + def self.ready(path) + body = html_template("PDF ready here") + + headers = html_headers(body) + + [200, headers, [body]] + end + + def self.reload(interval=1) + on_load = "setTimeout(function(){ window.location.reload()}, #{interval * 1000});" + body = html_template("

Preparing pdf...

", on_load) + + headers = html_headers(body) + headers["Retry-After"] = interval.to_s + + [503, headers, [body]] + end + + def self.error(message) + body = html_template("

Sorry request timed out... #{message}

") + + headers = html_headers(body) + + [504, headers, [body]] + end + + private + + def self.html_template(content, on_load = '') + <<-HTML.gsub(/[ \n]+/, ' ').strip + + + + + #{content} + + + HTML + end + + def self.html_headers(body) + { + "Content-Length" => body.size.to_s, + "Content-Type" => "text/html" + } + end + end +end diff --git a/spec/shrimp/middleware_spec.rb b/spec/shrimp/middleware_spec.rb index afc034b..2dfe38e 100644 --- a/spec/shrimp/middleware_spec.rb +++ b/spec/shrimp/middleware_spec.rb @@ -63,6 +63,17 @@ def mock_app(options = { }, conditions = { }) last_response.status.should eq 200 last_response.body.should eq "Hello World" end + + it "should return HTTP 200 after rendering as AJAX request" do + mock_file = double(File, :read => "Hello World", :close => true, :mtime => Time.now) + File.should_receive(:'exists?').and_return true + File.should_receive(:'size').and_return 1000 + File.should_receive(:'new').and_return mock_file + + get '/test.pdf', nil, {'HTTP_X_REQUESTED_WITH' => 'XMLHttpRequest'} + + last_response.status.should eq 200 + end end describe "#render_to" do From c5c058d88400c1ddaa5088f65490b64ba3c4d948 Mon Sep 17 00:00:00 2001 From: Martin Gotink Date: Sat, 22 Mar 2014 21:16:42 +0100 Subject: [PATCH 12/17] Don't set the full PDF path in the session The complete path to the PDF being rendered is being set in the session, making this path visible to the public. Now only the PDF filename is being set in the session. --- lib/shrimp/middleware.rb | 22 +++++++++++++--------- spec/shrimp/middleware_spec.rb | 7 +++++++ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/lib/shrimp/middleware.rb b/lib/shrimp/middleware.rb index 8b1a981..f7ad3f8 100644 --- a/lib/shrimp/middleware.rb +++ b/lib/shrimp/middleware.rb @@ -58,9 +58,12 @@ def phantom_request_url end def render_to - file_name = Digest::MD5.hexdigest(@request.url) + ".pdf" file_path = @options[:out_path] - "#{file_path}/#{file_name}" + "#{file_path}/#{render_file_name}" + end + + def render_file_name + Digest::MD5.hexdigest(@request.url) + ".pdf" end def already_rendered? @@ -71,23 +74,24 @@ def up_to_date?(ttl = 30) (Time.now - File.new(render_to).mtime) <= ttl end + def phantom_session + @request.session["phantom-rendering"] ||= { } + end + def remove_rendering_flag - @request.session["phantom-rendering"] ||={ } - @request.session["phantom-rendering"].delete(render_to) + phantom_session.delete(render_file_name) end def set_rendering_flag - @request.session["phantom-rendering"] ||={ } - @request.session["phantom-rendering"][render_to] = Time.now + phantom_session[render_file_name] = Time.now end def rendering_timed_out? - Time.now - @request.session["phantom-rendering"][render_to] > @options[:request_timeout] + Time.now - phantom_session[render_file_name] > @options[:request_timeout] end def rendering_in_progress? - @request.session["phantom-rendering"]||={ } - @request.session["phantom-rendering"][render_to] + phantom_session[render_file_name] end def render_as_pdf? diff --git a/spec/shrimp/middleware_spec.rb b/spec/shrimp/middleware_spec.rb index 2dfe38e..0e490d8 100644 --- a/spec/shrimp/middleware_spec.rb +++ b/spec/shrimp/middleware_spec.rb @@ -188,4 +188,11 @@ def mock_app(options = { }, conditions = { }) end end end + + describe "#phantom_session" do + it "uses the file name without a path as key" do + get 'http://example.org/test.pdf' + @middleware.send(:phantom_session).keys.first.should match(/\A[[:alnum:]]+\.pdf\z/) + end + end end From c34bb0ad6fb6f2f513648e56a922cdbf3c9ab71f Mon Sep 17 00:00:00 2001 From: Martin Gotink Date: Mon, 24 Mar 2014 09:26:11 +0100 Subject: [PATCH 13/17] Move conditions and condition validations to own classes --- lib/shrimp/conditions.rb | 21 +++++++++ lib/shrimp/middleware.rb | 38 +++------------- lib/shrimp/path_validator.rb | 32 ++++++++++++++ spec/shrimp/conditions_spec.rb | 38 ++++++++++++++++ spec/shrimp/path_validator_spec.rb | 70 ++++++++++++++++++++++++++++++ 5 files changed, 167 insertions(+), 32 deletions(-) create mode 100644 lib/shrimp/conditions.rb create mode 100644 lib/shrimp/path_validator.rb create mode 100644 spec/shrimp/conditions_spec.rb create mode 100644 spec/shrimp/path_validator_spec.rb diff --git a/lib/shrimp/conditions.rb b/lib/shrimp/conditions.rb new file mode 100644 index 0000000..9f8c2fa --- /dev/null +++ b/lib/shrimp/conditions.rb @@ -0,0 +1,21 @@ +require 'shrimp/path_validator' + +module Shrimp + class Conditions + def initialize(conditions = {}) + @conditions = conditions + end + + def path_is_valid?(path) + validator = PathValidator.new(path) + + if @conditions[:only] + validator.is_in_conditions?(@conditions[:only]) + elsif @conditions[:except] + validator.is_not_in_conditions?(@conditions[:except]) + else + true + end + end + end +end diff --git a/lib/shrimp/middleware.rb b/lib/shrimp/middleware.rb index f7ad3f8..e0426c8 100644 --- a/lib/shrimp/middleware.rb +++ b/lib/shrimp/middleware.rb @@ -1,9 +1,11 @@ +require 'shrimp/conditions' + module Shrimp class Middleware def initialize(app, options = { }, conditions = { }) @app = app @options = options - @conditions = conditions + @conditions = Conditions.new(conditions) @options[:polling_interval] ||= 1 @options[:polling_offset] ||= 1 @options[:cache_ttl] ||= 1 @@ -95,38 +97,10 @@ def rendering_in_progress? end def render_as_pdf? - return false unless request_path_is_pdf? - - if @conditions[:only] - path_is_in_only_conditions? - elsif @conditions[:except] - path_is_not_in_except_conditions? - else - true - end - end - - def path_is_in_only_conditions? - rules = [@conditions[:only]].flatten - path_is_in_rules?(rules) - end - - def path_is_not_in_except_conditions? - rules = [@conditions[:except]].flatten - !path_is_in_rules?(rules) - end - - def path_is_in_rules?(rules) - rules.any? do |pattern| - path_matches_pattern?(pattern) - end - end - - def path_matches_pattern?(pattern) - if pattern.is_a?(Regexp) - @request.path =~ pattern + if request_path_is_pdf? + @conditions.path_is_valid? @request.path else - @request.path[0, pattern.length] == pattern + false end end diff --git a/lib/shrimp/path_validator.rb b/lib/shrimp/path_validator.rb new file mode 100644 index 0000000..85e340a --- /dev/null +++ b/lib/shrimp/path_validator.rb @@ -0,0 +1,32 @@ +module Shrimp + class PathValidator + def initialize(path) + @path = path + end + + def is_in_conditions?(conditions) + rules = [conditions].flatten + is_in_rules?(rules) + end + + def is_not_in_conditions?(conditions) + !is_in_conditions?(conditions) + end + + private + + def is_in_rules?(rules) + rules.any? do |pattern| + matches_pattern?(pattern) + end + end + + def matches_pattern?(pattern) + if pattern.is_a?(Regexp) + @path =~ pattern + else + @path[0, pattern.length] == pattern + end + end + end +end diff --git a/spec/shrimp/conditions_spec.rb b/spec/shrimp/conditions_spec.rb new file mode 100644 index 0000000..0407c4b --- /dev/null +++ b/spec/shrimp/conditions_spec.rb @@ -0,0 +1,38 @@ +require "spec_helper" + +describe Shrimp::Conditions do + + describe "#path_is_valid?" do + context "with only conditions set" do + let(:conditions) { Shrimp::Conditions.new({:only => "/test"}) } + + it "returns true when the path is in only conditions" do + expect(conditions.path_is_valid?("/test/file.pdf")).to eq true + end + end + + context "with except conditions set" do + let(:conditions) { Shrimp::Conditions.new({:except => "/test"}) } + + it "returns false when the path is in except conditions" do + expect(conditions.path_is_valid?("/test/file.pdf")).to eq false + end + end + + context "with both only and except conditions set" do + let(:conditions) { Shrimp::Conditions.new({:only => "/test", :except => "/test"}) } + + it "uses the only conditions" do + expect(conditions.path_is_valid?("/test/file.pdf")).to eq true + end + end + + context "with no conditions set" do + let(:conditions) { Shrimp::Conditions.new } + + it "returns true" do + expect(conditions.path_is_valid?("/test/file.pdf")).to eq true + end + end + end +end diff --git a/spec/shrimp/path_validator_spec.rb b/spec/shrimp/path_validator_spec.rb new file mode 100644 index 0000000..31cac3d --- /dev/null +++ b/spec/shrimp/path_validator_spec.rb @@ -0,0 +1,70 @@ +require 'spec_helper' + +describe Shrimp::PathValidator do + + let(:validator) { Shrimp::PathValidator.new('/test/file.pdf') } + + describe "#is_in_conditions" do + context "when conditions is an array of regular expression" do + it "returns true when the path is in the conditions" do + expect(validator.is_in_conditions?([%r[^/public], %r[^/test]])).to eq true + end + + it "returns false when the path is not in the conditions" do + expect(validator.is_in_conditions?([%r[^/public], %r[^/other]])).to eq false + end + end + + context "when conditions is an array of strings" do + it "returns true when the path is in the conditions" do + expect(validator.is_in_conditions?(%w{/public /test})).to eq true + end + + it "returns false when the path is not in the conditions" do + expect(validator.is_in_conditions?([%w{/public /other}])).to eq false + end + end + + context "when conditions is a single string" do + it "returns true when the path is in the conditions" do + expect(validator.is_in_conditions?('/test')).to eq true + end + + it "returns false when the path is not in the conditions" do + expect(validator.is_in_conditions?('/public')).to eq false + end + end + end + + describe "#is_not_in_conditions" do + context "when conditions is an array of regular expression" do + it "returns false when the path is in the conditions" do + expect(validator.is_not_in_conditions?([%r[^/public], %r[^/test]])).to eq false + end + + it "returns true when the path is not in the conditions" do + expect(validator.is_not_in_conditions?([%r[^/public], %r[^/other]])).to eq true + end + end + + context "when conditions is an array of strings" do + it "returns false when the path is in the conditions" do + expect(validator.is_not_in_conditions?(%w{/public /test})).to eq false + end + + it "returns true when the path is not in the conditions" do + expect(validator.is_not_in_conditions?([%w{/public /other}])).to eq true + end + end + + context "when conditions is a single string" do + it "returns false when the path is in the conditions" do + expect(validator.is_not_in_conditions?('/test')).to eq false + end + + it "returns true when the path is not in the conditions" do + expect(validator.is_not_in_conditions?('/public')).to eq true + end + end + end +end From 01a2f3e327002d001e5f6012aa2b1b273a0dc075 Mon Sep 17 00:00:00 2001 From: Martin Gotink Date: Mon, 24 Mar 2014 14:27:39 +0100 Subject: [PATCH 14/17] Refactor call method --- lib/shrimp/middleware.rb | 81 ++++++++++++++++++++++------------ spec/shrimp/middleware_spec.rb | 4 +- 2 files changed, 55 insertions(+), 30 deletions(-) diff --git a/lib/shrimp/middleware.rb b/lib/shrimp/middleware.rb index e0426c8..a1163d3 100644 --- a/lib/shrimp/middleware.rb +++ b/lib/shrimp/middleware.rb @@ -15,34 +15,7 @@ def initialize(app, options = { }, conditions = { }) def call(env) @request = Rack::Request.new(env) if render_as_pdf? - if already_rendered? && (up_to_date?(@options[:cache_ttl]) || @options[:cache_ttl] == 0) - if File.size(render_to) == 0 - File.delete(render_to) - remove_rendering_flag - return Response.error("PDF file invalid") - end - return Response.ready(@request.path) if env['HTTP_X_REQUESTED_WITH'] - file = File.open(render_to, "rb") - body = file.read - file.close - File.delete(render_to) if @options[:cache_ttl] == 0 - remove_rendering_flag - Response.file(body) - else - if rendering_in_progress? - if rendering_timed_out? - remove_rendering_flag - Response.error("Rendering timeout") - else - Response.reload(@options[:polling_interval]) - end - else - File.delete(render_to) if already_rendered? - set_rendering_flag - fire_phantom - Response.reload(@options[:polling_offset]) - end - end + render_pdf else @app.call(env) end @@ -50,6 +23,58 @@ def call(env) private + def render_pdf + if pdf_ready? + send_pdf + elsif rendering_in_progress? + wait_for_rendering + else + start_rendering + end + end + + def pdf_ready? + already_rendered? && (up_to_date?(@options[:cache_ttl]) || @options[:cache_ttl] == 0) + end + + def send_pdf + if File.zero?(render_to) + File.delete(render_to) + remove_rendering_flag + return Response.error("PDF file invalid") + end + + return Response.ready(@request.path) if @request.xhr? + + body = read_pdf_contents + File.delete(render_to) if @options[:cache_ttl] == 0 + remove_rendering_flag + Response.file(body) + end + + def read_pdf_contents + file = File.open(render_to, "rb") + body = file.read + file.close + body + end + + def wait_for_rendering + if rendering_timed_out? + remove_rendering_flag + Response.error("Rendering timeout") + else + Response.reload(@options[:polling_interval]) + end + end + + def start_rendering + File.delete(render_to) if already_rendered? + set_rendering_flag + fire_phantom + Response.reload(@options[:polling_offset]) + end + # Private: start phantom rendering in a separate process def fire_phantom Process::detach fork { Phantom.new(phantom_request_url, @options, @request.cookies).to_pdf(render_to) } diff --git a/spec/shrimp/middleware_spec.rb b/spec/shrimp/middleware_spec.rb index 0e490d8..ce6185f 100644 --- a/spec/shrimp/middleware_spec.rb +++ b/spec/shrimp/middleware_spec.rb @@ -56,7 +56,7 @@ def mock_app(options = { }, conditions = { }) it "should return a pdf with 200 after rendering" do mock_file = double(File, :read => "Hello World", :close => true, :mtime => Time.now) File.should_receive(:'exists?').and_return true - File.should_receive(:'size').and_return 1000 + File.should_receive(:'zero?').and_return false File.should_receive(:'open').and_return mock_file File.should_receive(:'new').and_return mock_file get '/test.pdf' @@ -67,7 +67,7 @@ def mock_app(options = { }, conditions = { }) it "should return HTTP 200 after rendering as AJAX request" do mock_file = double(File, :read => "Hello World", :close => true, :mtime => Time.now) File.should_receive(:'exists?').and_return true - File.should_receive(:'size').and_return 1000 + File.should_receive(:'zero?').and_return false File.should_receive(:'new').and_return mock_file get '/test.pdf', nil, {'HTTP_X_REQUESTED_WITH' => 'XMLHttpRequest'} From 2fbdcd953e885e0e164632f95453267c1bb84098 Mon Sep 17 00:00:00 2001 From: Martin Gotink Date: Mon, 24 Mar 2014 15:59:51 +0100 Subject: [PATCH 15/17] Move request specific code to new PhantomRequest class --- lib/shrimp/middleware.rb | 46 ++++---------- lib/shrimp/phantom_request.rb | 41 +++++++++++++ spec/shrimp/middleware_spec.rb | 20 +----- spec/shrimp/phantom_request_spec.rb | 95 +++++++++++++++++++++++++++++ 4 files changed, 150 insertions(+), 52 deletions(-) create mode 100644 lib/shrimp/phantom_request.rb create mode 100644 spec/shrimp/phantom_request_spec.rb diff --git a/lib/shrimp/middleware.rb b/lib/shrimp/middleware.rb index a1163d3..dc9d53f 100644 --- a/lib/shrimp/middleware.rb +++ b/lib/shrimp/middleware.rb @@ -1,4 +1,5 @@ require 'shrimp/conditions' +require 'shrimp/phantom_request' module Shrimp class Middleware @@ -13,7 +14,8 @@ def initialize(app, options = { }, conditions = { }) end def call(env) - @request = Rack::Request.new(env) + @request = PhantomRequest.new(env) + if render_as_pdf? render_pdf else @@ -26,7 +28,7 @@ def call(env) def render_pdf if pdf_ready? send_pdf - elsif rendering_in_progress? + elsif @request.rendering_in_progress? wait_for_rendering else start_rendering @@ -40,7 +42,7 @@ def pdf_ready? def send_pdf if File.zero?(render_to) File.delete(render_to) - remove_rendering_flag + @request.remove_rendering_flag return Response.error("PDF file invalid") end @@ -48,7 +50,7 @@ def send_pdf body = read_pdf_contents File.delete(render_to) if @options[:cache_ttl] == 0 - remove_rendering_flag + @request.remove_rendering_flag Response.file(body) end @@ -61,7 +63,7 @@ def read_pdf_contents def wait_for_rendering if rendering_timed_out? - remove_rendering_flag + @request.remove_rendering_flag Response.error("Rendering timeout") else Response.reload(@options[:polling_interval]) @@ -70,18 +72,14 @@ def wait_for_rendering def start_rendering File.delete(render_to) if already_rendered? - set_rendering_flag + @request.set_rendering_flag fire_phantom Response.reload(@options[:polling_offset]) end # Private: start phantom rendering in a separate process def fire_phantom - Process::detach fork { Phantom.new(phantom_request_url, @options, @request.cookies).to_pdf(render_to) } - end - - def phantom_request_url - @request.url.sub(%r{\.pdf(\?.*)?$}, '\1') + Process::detach fork { Phantom.new(@request.phantom_request_url, @options, @request.cookies).to_pdf(render_to) } end def render_to @@ -90,7 +88,7 @@ def render_to end def render_file_name - Digest::MD5.hexdigest(@request.url) + ".pdf" + "#{@request.session_key}.pdf" end def already_rendered? @@ -101,36 +99,16 @@ def up_to_date?(ttl = 30) (Time.now - File.new(render_to).mtime) <= ttl end - def phantom_session - @request.session["phantom-rendering"] ||= { } - end - - def remove_rendering_flag - phantom_session.delete(render_file_name) - end - - def set_rendering_flag - phantom_session[render_file_name] = Time.now - end - def rendering_timed_out? - Time.now - phantom_session[render_file_name] > @options[:request_timeout] - end - - def rendering_in_progress? - phantom_session[render_file_name] + @request.rendering_timeout? @options[:request_timeout] end def render_as_pdf? - if request_path_is_pdf? + if @request.path_is_pdf? @conditions.path_is_valid? @request.path else false end end - - def request_path_is_pdf? - !!@request.path.match(%r{\.pdf$}) - end end end diff --git a/lib/shrimp/phantom_request.rb b/lib/shrimp/phantom_request.rb new file mode 100644 index 0000000..e249cf1 --- /dev/null +++ b/lib/shrimp/phantom_request.rb @@ -0,0 +1,41 @@ +require 'digest/md5' + +module Shrimp + class PhantomRequest < Rack::Request + + def session_key + Digest::MD5.hexdigest(url) + end + + def path_is_pdf? + !!path.match(%r{\.pdf$}) + end + + def phantom_request_url + url.sub(%r{\.pdf(\?.*)?$}, '\1') + end + + def remove_rendering_flag + phantom_session.delete(session_key) + end + + def set_rendering_flag + phantom_session[session_key] = Time.now + end + + def rendering_timeout?(timeout) + Time.now - phantom_session[session_key] > timeout + end + + def rendering_in_progress? + !!phantom_session[session_key] + end + + private + + def phantom_session + session["phantom-rendering"] ||= {} + end + + end +end diff --git a/spec/shrimp/middleware_spec.rb b/spec/shrimp/middleware_spec.rb index ce6185f..ea3149d 100644 --- a/spec/shrimp/middleware_spec.rb +++ b/spec/shrimp/middleware_spec.rb @@ -173,26 +173,10 @@ def mock_app(options = { }, conditions = { }) end end - describe "#phantom_request_url" do - context "when the URL has no parameters" do - it "removes the .pdf extension" do - get 'http://example.org/test.pdf' - @middleware.send(:phantom_request_url).should eq 'http://example.org/test' - end - end - - context "when the URL has parameters" do - it "it preserves the parameters but removes the .pdf extension" do - get 'http://example.org/test.pdf?x=42' - @middleware.send(:phantom_request_url).should eq 'http://example.org/test?x=42' - end - end - end - - describe "#phantom_session" do + describe "#render_file_name" do it "uses the file name without a path as key" do get 'http://example.org/test.pdf' - @middleware.send(:phantom_session).keys.first.should match(/\A[[:alnum:]]+\.pdf\z/) + @middleware.send(:render_file_name).should match(/\A[[:alnum:]]+\.pdf\z/) end end end diff --git a/spec/shrimp/phantom_request_spec.rb b/spec/shrimp/phantom_request_spec.rb new file mode 100644 index 0000000..7f7cc22 --- /dev/null +++ b/spec/shrimp/phantom_request_spec.rb @@ -0,0 +1,95 @@ +require "spec_helper" + +describe Shrimp::PhantomRequest do + + let(:phantom_request) { Shrimp::PhantomRequest.new({}) } + + describe "#session_key" do + it "returns a hash of the request URL" do + phantom_request.should_receive(:url).and_return('/test/file.pdf') + expect(phantom_request.session_key).to match(/\A[[:alnum:]]+\z/) + end + end + + describe "#phantom_request_url" do + context "when the URL has no parameters" do + it "removes the .pdf extension" do + phantom_request.should_receive(:url).and_return('http://example.org/test.pdf') + expect(phantom_request.phantom_request_url).to eq 'http://example.org/test' + end + end + + context "when the URL has parameters" do + it "it preserves the parameters but removes the .pdf extension" do + phantom_request.should_receive(:url).and_return('http://example.org/test.pdf?x=42') + expect(phantom_request.phantom_request_url).to eq 'http://example.org/test?x=42' + end + end + end + + describe "#path_is_pdf?" do + it "returns true if the path has a .pdf extension" do + phantom_request.should_receive(:path).and_return('/test/file.pdf') + expect(phantom_request.path_is_pdf?).to eq true + end + + it "returns false if the path has no .pdf extension" do + phantom_request.should_receive(:path).and_return('/test/file.pdf.exe') + expect(phantom_request.path_is_pdf?).to eq false + end + end + + describe "#set_rendering_flag" do + it "set the rendering flag" do + phantom_request.remove_rendering_flag + phantom_request.set_rendering_flag + + key = phantom_request.session_key + expect(phantom_request.send(:phantom_session)[key]).to_not be_nil + end + end + + describe "#remove_rendering_flag" do + it "removes the rendering flag" do + phantom_request.set_rendering_flag + phantom_request.remove_rendering_flag + + key = phantom_request.session_key + expect(phantom_request.send(:phantom_session)[key]).to be_nil + end + end + + describe "#rendering_in_progress?" do + it "returns true when rendering is active" do + phantom_request.remove_rendering_flag + phantom_request.set_rendering_flag + + expect(phantom_request.rendering_in_progress?).to eq true + end + + it "returns false when rendering is not active" do + phantom_request.set_rendering_flag + phantom_request.remove_rendering_flag + + expect(phantom_request.rendering_in_progress?).to eq false + end + end + + describe "#rendering_timeout?" do + before do + phantom_request.set_rendering_flag + + now = Time.now + Time.should_receive(:now).and_return (now + 10) + end + + it "returns true when rendering has timed out" do + expect(phantom_request.rendering_timeout?(5)).to eq true + end + + it "returns false when rendering is not timed out" do + expect(phantom_request.rendering_timeout?(15)).to eq false + end + end + +end From 5fbcb1a05614098394b5e90f2d0eb37485b95031 Mon Sep 17 00:00:00 2001 From: Martin Gotink Date: Tue, 25 Mar 2014 13:35:17 +0100 Subject: [PATCH 16/17] Explain that query strings can be used in a URL --- README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 240efd1..7ae0491 100644 --- a/README.md +++ b/README.md @@ -152,7 +152,7 @@ To include some fancy Ajax stuff with jquery ```js - var url = '/my_page.pdf' + var url = '/my_page.pdf'; var statusCodes = { 200: function() { return window.location.assign(url); @@ -178,6 +178,12 @@ To include some fancy Ajax stuff with jquery ``` +The URL in the above example can also contain a query string: + +```js +var url = '/my_page.pdf?answer=42'; +``` + ## Contributing 1. Fork it From 27462b4c790f5ac979713ac2c24346c2ec8e5af4 Mon Sep 17 00:00:00 2001 From: Martin Gotink Date: Tue, 25 Mar 2014 13:51:07 +0100 Subject: [PATCH 17/17] Log a nicer error when a timeout occurs --- lib/shrimp/rasterize.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/shrimp/rasterize.js b/lib/shrimp/rasterize.js index ddcb5b0..88d127a 100644 --- a/lib/shrimp/rasterize.js +++ b/lib/shrimp/rasterize.js @@ -26,7 +26,7 @@ function print_usage() { } window.setTimeout(function () { - error("Shit's being weird no result within: " + time_out + "ms"); + error("Timeout, no result within: " + time_out + "ms"); }, time_out); function renderUrl(url, output, options) {