Skip to content

Commit cd5c055

Browse files
committed
Prevent empty filenames when attempting to download assets without a Content-Disposition header
1 parent 7a2a15b commit cd5c055

File tree

2 files changed

+178
-4
lines changed

2 files changed

+178
-4
lines changed

planet/api/utils.py

Lines changed: 136 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,13 @@
1616
from datetime import datetime
1717
from . import exceptions
1818
import json
19+
import mimetypes
1920
import os
21+
import random
2022
import re
23+
import string
2124
import threading
25+
import urlparse
2226
from ._fatomic import atomic_open
2327

2428
_ISO_FMT = '%Y-%m-%dT%H:%M:%S.%f+00:00'
@@ -89,11 +93,138 @@ def check_status(response):
8993

9094

9195
def get_filename(response):
92-
cd = response.headers.get('content-disposition', '')
96+
"""Derive a filename from the given response.
97+
98+
>>> import requests
99+
>>> from planet.api import utils
100+
>>> response = requests.Response()
101+
>>> response.headers = {
102+
... 'date': 'Thu, 14 Feb 2019 16:13:26 GMT',
103+
... 'last-modified': 'Wed, 22 Nov 2017 17:22:31 GMT',
104+
... 'accept-ranges': 'bytes',
105+
... 'content-type': 'image/tiff',
106+
... 'content-length': '57350256',
107+
... 'content-disposition': 'attachment; filename="open_california.tif"'
108+
... }
109+
>>> response.url = 'https://planet.com/path/to/example.tif?foo=f6f1'
110+
>>> print(utils.get_filename(response))
111+
open_california.tif
112+
>>> del response
113+
>>> response = requests.Response()
114+
>>> response.headers = {
115+
... 'date': 'Thu, 14 Feb 2019 16:13:26 GMT',
116+
... 'last-modified': 'Wed, 22 Nov 2017 17:22:31 GMT',
117+
... 'accept-ranges': 'bytes',
118+
... 'content-type': 'image/tiff',
119+
... 'content-length': '57350256'
120+
... }
121+
>>> response.url = 'https://planet.com/path/to/example.tif?foo=f6f1'
122+
>>> print(utils.get_filename(response))
123+
example.tif
124+
>>> del response
125+
>>> response = requests.Response()
126+
>>> response.headers = {
127+
... 'date': 'Thu, 14 Feb 2019 16:13:26 GMT',
128+
... 'last-modified': 'Wed, 22 Nov 2017 17:22:31 GMT',
129+
... 'accept-ranges': 'bytes',
130+
... 'content-type': 'image/tiff',
131+
... 'content-length': '57350256'
132+
... }
133+
>>> response.url = 'https://planet.com/path/to/oops/'
134+
>>> print(utils.get_filename(response)) #doctest:+SKIP
135+
planet-bFL6pwki.tif
136+
>>>
137+
138+
:param response: An HTTP response.
139+
:type response: :py:class:`requests.Response`
140+
:returns: a filename (i.e. ``basename``)
141+
:rtype: str
142+
"""
143+
name = (get_filename_from_headers(response.headers) or
144+
get_filename_from_url(response.url) or
145+
get_random_filename(response.headers.get('content-type')))
146+
return name
147+
148+
149+
def get_filename_from_headers(headers):
150+
"""Get a filename from the Content-Disposition header, if available.
151+
152+
>>> from planet.api import utils
153+
>>> headers = {
154+
... 'date': 'Thu, 14 Feb 2019 16:13:26 GMT',
155+
... 'last-modified': 'Wed, 22 Nov 2017 17:22:31 GMT',
156+
... 'accept-ranges': 'bytes',
157+
... 'content-type': 'image/tiff',
158+
... 'content-length': '57350256',
159+
... 'content-disposition': 'attachment; filename="open_california.tif"'
160+
... }
161+
>>> name = utils.get_filename_from_headers(headers)
162+
>>> print(name)
163+
open_california.tif
164+
>>>
165+
>>> headers.pop('content-disposition', None)
166+
'attachment; filename="open_california.tif"'
167+
>>> name = utils.get_filename_from_headers(headers)
168+
>>> print(name)
169+
None
170+
>>>
171+
172+
:param headers dict: a ``dict`` of response headers
173+
:returns: a filename (i.e. ``basename``)
174+
:rtype: str or None
175+
"""
176+
cd = headers.get('content-disposition', '')
93177
match = re.search('filename="?([^"]+)"?', cd)
94-
if match:
95-
return match.group(1)
96-
return cd
178+
return match.group(1) if match else None
179+
180+
181+
def get_filename_from_url(url):
182+
"""Get a filename from a URL.
183+
184+
>>> from planet.api import utils
185+
>>> urls = [
186+
... 'https://planet.com/',
187+
... 'https://planet.com/path/to/',
188+
... 'https://planet.com/path/to/example.tif',
189+
... 'https://planet.com/path/to/example.tif?foo=f6f1&bar=baz',
190+
... 'https://planet.com/path/to/example.tif?foo=f6f1&bar=baz#quux'
191+
... ]
192+
>>> for url in urls:
193+
... print('{} -> {}'.format(url, utils.get_filename_from_url(url)))
194+
...
195+
https://planet.com/ -> None
196+
https://planet.com/path/to/ -> None
197+
https://planet.com/path/to/example.tif -> example.tif
198+
https://planet.com/path/to/example.tif?foo=f6f1&bar=baz -> example.tif
199+
https://planet.com/path/to/example.tif?foo=f6f1&bar=baz#quux -> example.tif
200+
>>>
201+
202+
:returns: a filename (i.e. ``basename``)
203+
:rtype: str or None
204+
"""
205+
path = urlparse.urlparse(url).path
206+
name = path[path.rfind('/')+1:]
207+
return name or None
208+
209+
210+
def get_random_filename(content_type=None):
211+
"""Get a pseudo-random, Planet-looking filename.
212+
213+
>>> from planet.api import utils
214+
>>> print(utils.get_random_filename()) #doctest:+SKIP
215+
planet-61FPnh7K
216+
>>> print(utils.get_random_filename('image/tiff')) #doctest:+SKIP
217+
planet-V8ELYxy5.tif
218+
>>>
219+
220+
:returns: a filename (i.e. ``basename``)
221+
:rtype: str
222+
"""
223+
extension = mimetypes.guess_extension(content_type or '') or ''
224+
characters = string.ascii_letters + '0123456789'
225+
letters = ''.join(random.sample(characters, 8))
226+
name = 'planet-{}{}'.format(letters, extension)
227+
return name
97228

98229

99230
def write_to_file(directory=None, callback=None, overwrite=True):
@@ -112,6 +243,7 @@ def write_to_file(directory=None, callback=None, overwrite=True):
112243
write progress.
113244
:param overwrite bool: Overwrite any existing files. Defaults to True.
114245
'''
246+
115247
def writer(body):
116248
file = os.path.join(directory or '.', body.name)
117249
if overwrite or not os.path.exists(file):

tests/test_utils.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from mock import Mock
1515
import pytest
1616
from datetime import datetime
17+
import re
1718
from planet.api import utils
1819
from planet.api import exceptions
1920
from _common import read_fixture
@@ -110,3 +111,44 @@ def test_write_to_file(tmpdir):
110111
utils.write_to_file(str(tmpdir), callback=callback, overwrite=False)(body)
111112
assert body.write.call_count == 1
112113
assert callback.call_args[1]['skip'] == body
114+
115+
116+
@pytest.mark.parametrize('headers,expected', [
117+
({
118+
'date': 'Thu, 14 Feb 2019 16:13:26 GMT',
119+
'last-modified': 'Wed, 22 Nov 2017 17:22:31 GMT',
120+
'accept-ranges': 'bytes',
121+
'content-type': 'image/tiff',
122+
'content-length': '57350256',
123+
'content-disposition': 'attachment; filename="open_california.tif"'
124+
}, 'open_california.tif'),
125+
({
126+
'date': 'Thu, 14 Feb 2019 16:13:26 GMT',
127+
'last-modified': 'Wed, 22 Nov 2017 17:22:31 GMT',
128+
'accept-ranges': 'bytes',
129+
'content-type': 'image/tiff',
130+
'content-length': '57350256'
131+
}, None),
132+
({}, None)
133+
])
134+
def test_get_filename_from_headers(headers, expected):
135+
assert utils.get_filename_from_headers(headers) == expected
136+
137+
138+
@pytest.mark.parametrize('url,expected', [
139+
('https://planet.com/', None),
140+
('https://planet.com/path/to/', None),
141+
('https://planet.com/path/to/example.tif', 'example.tif'),
142+
('https://planet.com/path/to/example.tif?foo=f6f1&bar=baz', 'example.tif'),
143+
('https://planet.com/path/to/example.tif?foo=f6f1#quux', 'example.tif'),
144+
])
145+
def test_get_filename_from_url(url, expected):
146+
assert utils.get_filename_from_url(url) == expected
147+
148+
149+
@pytest.mark.parametrize('content_type,check', [
150+
(None, lambda x: re.match(r'^planet-[a-z0-9]{8}$', x, re.I) is not None),
151+
('image/tiff', lambda x: x.endswith('.tif')),
152+
])
153+
def test_get_random_filename(content_type, check):
154+
assert check(utils.get_random_filename(content_type))

0 commit comments

Comments
 (0)