Skip to content

Commit 1d72dce

Browse files
committed
initial commit:
1 parent 321ba98 commit 1d72dce

File tree

2 files changed

+354
-130
lines changed

2 files changed

+354
-130
lines changed

data_discovery/data_discovery.ipynb

Lines changed: 194 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -1,79 +1,44 @@
11
{
22
"cells": [
33
{
4-
"cell_type": "code",
5-
"execution_count": 2,
6-
"metadata": {},
7-
"outputs": [],
8-
"source": [
9-
"import getpass\n",
10-
"import socket\n",
11-
"import requests\n",
12-
"import json\n",
13-
"import zipfile\n",
14-
"import os\n",
15-
"import io\n",
16-
"import s3fs"
17-
]
18-
},
19-
{
20-
"cell_type": "code",
21-
"execution_count": 7,
22-
"metadata": {},
23-
"outputs": [],
24-
"source": [
25-
"with open('.cred') as json_file: \n",
26-
" data = json.load(json_file)"
27-
]
28-
},
29-
{
30-
"cell_type": "code",
31-
"execution_count": 8,
4+
"cell_type": "markdown",
325
"metadata": {},
33-
"outputs": [],
346
"source": [
35-
"# Request token from Common Metadata Repository using Earthdata credentials\n",
36-
"token_api_url = 'https://api.echo.nasa.gov/echo-rest/tokens'\n",
37-
"hostname = socket.gethostname()\n",
38-
"ip = socket.gethostbyname(hostname)\n",
7+
"### Script to acquire ICESat-2 data into your Pangeo workspace for initial tutorial development\n",
398
"\n",
40-
"data = {\n",
41-
" 'token': {\n",
42-
" 'username': data['uid'],\n",
43-
" 'password': data['pwd'],\n",
44-
" 'client_id': 'NSIDC_client_id',\n",
45-
" 'user_ip_address': ip\n",
46-
" }\n",
47-
"}\n",
48-
"headers={'Accept': 'application/json'}\n",
49-
"response = requests.post(token_api_url, json=data, headers=headers)\n",
50-
"token = json.loads(response.content)['token']['id']"
51-
]
52-
},
53-
{
54-
"cell_type": "code",
55-
"execution_count": 14,
56-
"metadata": {},
57-
"outputs": [],
58-
"source": [
59-
"response = requests.get(\"https://n5eil02u.ecs.nsidc.org/egi/request?short_name=ATL03&version=205&temporal=2018-10-17T00:00:00,2018-10-17T23:59:59&bounding_box=-115,35,-99,42&agent=NO&token={}&page_size=1&email=arendta@uw.edu\".format(token)) "
9+
"* Scott Henderson, UW\n",
10+
"* Anthony Arendt, UW"
6011
]
6112
},
6213
{
6314
"cell_type": "code",
64-
"execution_count": 15,
15+
"execution_count": 2,
6516
"metadata": {},
6617
"outputs": [],
6718
"source": [
68-
"with zipfile.ZipFile(io.BytesIO(response.content)) as z:\n",
69-
" z.extractall(os.getcwd())"
19+
"import s3fs\n",
20+
"import xarray as xr\n",
21+
"import h5py"
7022
]
7123
},
7224
{
7325
"cell_type": "code",
74-
"execution_count": null,
26+
"execution_count": 11,
7527
"metadata": {},
76-
"outputs": [],
28+
"outputs": [
29+
{
30+
"data": {
31+
"text/plain": [
32+
"['pangeo-data-upload-oregon/icesat2/atl03/ATL03_20181017101822_02870106_205_01.h5',\n",
33+
" 'pangeo-data-upload-oregon/icesat2/atl03/ATL03_20181017101822_02870106_205_01.iso.xml',\n",
34+
" 'pangeo-data-upload-oregon/icesat2/atl03/readme.txt']"
35+
]
36+
},
37+
"execution_count": 11,
38+
"metadata": {},
39+
"output_type": "execute_result"
40+
}
41+
],
7742
"source": [
7843
"# Connect to bucket\n",
7944
"import s3fs\n",
@@ -86,107 +51,206 @@
8651
},
8752
{
8853
"cell_type": "code",
89-
"execution_count": null,
54+
"execution_count": 12,
9055
"metadata": {},
9156
"outputs": [],
9257
"source": [
9358
"# Download an HDF5 to home directory\n",
94-
"filename = 'ATL06_20181019051247_03140110_203_01.h5'\n",
59+
"filename = 'ATL03_20181017101822_02870106_205_01.h5'\n",
9560
"data = dataDir + '/' + filename\n",
9661
"fs.get(data, filename)"
9762
]
9863
},
64+
{
65+
"cell_type": "markdown",
66+
"metadata": {},
67+
"source": [
68+
"### Try xarray\n",
69+
"\n",
70+
"We can look at the data structure, but it is not formatted for use in xarray"
71+
]
72+
},
9973
{
10074
"cell_type": "code",
101-
"execution_count": 7,
75+
"execution_count": 19,
76+
"metadata": {},
77+
"outputs": [
78+
{
79+
"name": "stdout",
80+
"output_type": "stream",
81+
"text": [
82+
"<xarray.Dataset>\n",
83+
"Dimensions: (ds_surf_type: 5, ds_xyz: 3)\n",
84+
"Coordinates:\n",
85+
" * ds_surf_type (ds_surf_type) int32 1 2 3 4 5\n",
86+
" * ds_xyz (ds_xyz) int32 1 2 3\n",
87+
"Data variables:\n",
88+
" *empty*\n",
89+
"Attributes:\n",
90+
" granule_type: ATL03\n",
91+
" short_name: ATL03\n",
92+
" level: L2\n",
93+
" description: Photon heights determined by ATBD Alg...\n",
94+
" contributor_name: Thomas E Neumann (thomas.neumann@nasa...\n",
95+
" contributor_role: Instrument Engineer, Investigator, Pr...\n",
96+
" Conventions: CF-1.6\n",
97+
" date_type: UTC\n",
98+
" featureType: trajectory\n",
99+
" geospatial_lat_units: degrees_north\n",
100+
" geospatial_lon_units: degrees_east\n",
101+
" identifier_product_doi: 10.5067/ATLAS/ATL03.001\n",
102+
" identifier_product_doi_authority: http://dx.doi.org\n",
103+
" identifier_product_type: ATL03\n",
104+
" license: Data may not be reproduced or distrib...\n",
105+
" naming_authority: http://dx.doi.org\n",
106+
" spatial_coverage_type: Horizontal\n",
107+
" standard_name_vocabulary: CF-1.6\n",
108+
" time_type: CCSDS UTC-A\n",
109+
" date_created: 2019-02-22T21:36:26.000000Z\n",
110+
" hdfversion: HDF5 1.10.3\n",
111+
" history: 2019-02-22T21:36:26.000000Z;7d7d1217-...\n",
112+
" identifier_file_uuid: 7d7d1217-0a51-32c7-9668-76f9cb6f5abd\n",
113+
" identifier_product_format_version: 3.0\n",
114+
" time_coverage_duration: 511.0\n",
115+
" time_coverage_end: 2018-10-17T10:26:53.000000Z\n",
116+
" time_coverage_start: 2018-10-17T10:18:22.000000Z\n",
117+
" geospatial_lat_min: 27.165281074967876\n",
118+
" geospatial_lon_min: -104.25776485626916\n",
119+
" geospatial_lat_max: 59.45914367107108\n",
120+
" geospatial_lon_max: -99.71800465595649\n",
121+
" publisher_name: NSIDC DAAC > NASA National Snow and I...\n",
122+
" publisher_email: nsidc@nsidc.org\n",
123+
" publisher_url: http://nsidc.org/daac/\n",
124+
" title: ATLAS/ICESat-2 L2A Global Geolocated ...\n",
125+
" identifier_file_product_type: ATL03\n",
126+
" institution: National Aeronautics and Space Admini...\n",
127+
" creator_name: GSFC I-SIPS > ICESat-2 Science Invest...\n",
128+
" summary: The purpose of ATL03 is to provide al...\n",
129+
" keywords: EARTH SCIENCE > CRYOSPHERE > SEA ICE ...\n",
130+
" keywords_vocabulary: NASA/GCMD Science Keywords\n",
131+
" citation: Cite these data in publications as fo...\n",
132+
" processing_level: 2A\n",
133+
" references: http://nsidc.org/data/icesat2/data.html\n",
134+
" project: ICESat-2 > Ice, Cloud, and land Eleva...\n",
135+
" instrument: ATLAS > Advanced Topographic Laser Al...\n",
136+
" platform: ICESat-2 > Ice, Cloud, and land Eleva...\n",
137+
" source: Spacecraft\n"
138+
]
139+
}
140+
],
141+
"source": [
142+
"ds = xr.open_dataset('ATL03_20181017101822_02870106_205_01.h5')\n",
143+
"print(ds)"
144+
]
145+
},
146+
{
147+
"cell_type": "markdown",
102148
"metadata": {},
103-
"outputs": [],
104149
"source": [
105-
"import xarray as xr"
150+
"### Using h5py to look at the dataset structure"
106151
]
107152
},
108153
{
109154
"cell_type": "code",
110-
"execution_count": 10,
155+
"execution_count": 3,
111156
"metadata": {},
112157
"outputs": [],
113158
"source": [
114-
"ds = xr.open_dataset('ATL03_20181017101822_02870106_205_01.h5')"
159+
"f = h5py.File('ATL03_20181017101822_02870106_205_01.h5','r')"
115160
]
116161
},
117162
{
118163
"cell_type": "code",
119-
"execution_count": 11,
164+
"execution_count": 6,
120165
"metadata": {},
121166
"outputs": [
122167
{
123-
"data": {
124-
"text/plain": [
125-
"<xarray.Dataset>\n",
126-
"Dimensions: (ds_surf_type: 5, ds_xyz: 3)\n",
127-
"Coordinates:\n",
128-
" * ds_surf_type (ds_surf_type) int32 1 2 3 4 5\n",
129-
" * ds_xyz (ds_xyz) int32 1 2 3\n",
130-
"Data variables:\n",
131-
" *empty*\n",
132-
"Attributes:\n",
133-
" granule_type: ATL03\n",
134-
" short_name: ATL03\n",
135-
" level: L2\n",
136-
" description: Photon heights determined by ATBD Alg...\n",
137-
" contributor_name: Thomas E Neumann (thomas.neumann@nasa...\n",
138-
" contributor_role: Instrument Engineer, Investigator, Pr...\n",
139-
" Conventions: CF-1.6\n",
140-
" date_type: UTC\n",
141-
" featureType: trajectory\n",
142-
" geospatial_lat_units: degrees_north\n",
143-
" geospatial_lon_units: degrees_east\n",
144-
" identifier_product_doi: 10.5067/ATLAS/ATL03.001\n",
145-
" identifier_product_doi_authority: http://dx.doi.org\n",
146-
" identifier_product_type: ATL03\n",
147-
" license: Data may not be reproduced or distrib...\n",
148-
" naming_authority: http://dx.doi.org\n",
149-
" spatial_coverage_type: Horizontal\n",
150-
" standard_name_vocabulary: CF-1.6\n",
151-
" time_type: CCSDS UTC-A\n",
152-
" date_created: 2019-02-22T21:36:26.000000Z\n",
153-
" hdfversion: HDF5 1.10.3\n",
154-
" history: 2019-02-22T21:36:26.000000Z;7d7d1217-...\n",
155-
" identifier_file_uuid: 7d7d1217-0a51-32c7-9668-76f9cb6f5abd\n",
156-
" identifier_product_format_version: 3.0\n",
157-
" time_coverage_duration: 511.0\n",
158-
" time_coverage_end: 2018-10-17T10:26:53.000000Z\n",
159-
" time_coverage_start: 2018-10-17T10:18:22.000000Z\n",
160-
" geospatial_lat_min: 27.165281074967876\n",
161-
" geospatial_lon_min: -104.25776485626916\n",
162-
" geospatial_lat_max: 59.45914367107108\n",
163-
" geospatial_lon_max: -99.71800465595649\n",
164-
" publisher_name: NSIDC DAAC > NASA National Snow and I...\n",
165-
" publisher_email: nsidc@nsidc.org\n",
166-
" publisher_url: http://nsidc.org/daac/\n",
167-
" title: ATLAS/ICESat-2 L2A Global Geolocated ...\n",
168-
" identifier_file_product_type: ATL03\n",
169-
" institution: National Aeronautics and Space Admini...\n",
170-
" creator_name: GSFC I-SIPS > ICESat-2 Science Invest...\n",
171-
" summary: The purpose of ATL03 is to provide al...\n",
172-
" keywords: EARTH SCIENCE > CRYOSPHERE > SEA ICE ...\n",
173-
" keywords_vocabulary: NASA/GCMD Science Keywords\n",
174-
" citation: Cite these data in publications as fo...\n",
175-
" processing_level: 2A\n",
176-
" references: http://nsidc.org/data/icesat2/data.html\n",
177-
" project: ICESat-2 > Ice, Cloud, and land Eleva...\n",
178-
" instrument: ATLAS > Advanced Topographic Laser Al...\n",
179-
" platform: ICESat-2 > Ice, Cloud, and land Eleva...\n",
180-
" source: Spacecraft"
181-
]
182-
},
183-
"execution_count": 11,
184-
"metadata": {},
185-
"output_type": "execute_result"
168+
"name": "stdout",
169+
"output_type": "stream",
170+
"text": [
171+
"---\n",
172+
"Group: METADATA\n",
173+
"---\n",
174+
"<HDF5 group \"/METADATA/AcquisitionInformation\" (4 members)>\n",
175+
"<HDF5 group \"/METADATA/DataQuality\" (2 members)>\n",
176+
"<HDF5 group \"/METADATA/DatasetIdentification\" (0 members)>\n",
177+
"<HDF5 group \"/METADATA/Extent\" (0 members)>\n",
178+
"<HDF5 group \"/METADATA/Lineage\" (20 members)>\n",
179+
"<HDF5 group \"/METADATA/ProcessStep\" (4 members)>\n",
180+
"<HDF5 group \"/METADATA/ProductSpecificationDocument\" (0 members)>\n",
181+
"<HDF5 group \"/METADATA/QADatasetIdentification\" (0 members)>\n",
182+
"<HDF5 group \"/METADATA/SeriesIdentification\" (0 members)>\n",
183+
"---\n",
184+
"Group: ancillary_data\n",
185+
"---\n",
186+
"<HDF5 dataset \"atl03_pad\": shape (1,), type \"<f8\">\n",
187+
"<HDF5 dataset \"atlas_sdp_gps_epoch\": shape (1,), type \"<f8\">\n",
188+
"<HDF5 dataset \"control\": shape (1,), type \"|S10000\">\n",
189+
"<HDF5 dataset \"data_end_utc\": shape (1,), type \"|S27\">\n",
190+
"<HDF5 dataset \"data_start_utc\": shape (1,), type \"|S27\">\n",
191+
"<HDF5 dataset \"end_cycle\": shape (1,), type \"<i4\">\n",
192+
"<HDF5 dataset \"end_delta_time\": shape (1,), type \"<f8\">\n",
193+
"<HDF5 dataset \"end_geoseg\": shape (1,), type \"<i4\">\n",
194+
"<HDF5 dataset \"end_gpssow\": shape (1,), type \"<f8\">\n",
195+
"<HDF5 dataset \"end_gpsweek\": shape (1,), type \"<i4\">\n",
196+
"<HDF5 dataset \"end_orbit\": shape (1,), type \"<i4\">\n",
197+
"<HDF5 dataset \"end_region\": shape (1,), type \"<i4\">\n",
198+
"<HDF5 dataset \"end_rgt\": shape (1,), type \"<i4\">\n",
199+
"<HDF5 dataset \"granule_end_utc\": shape (1,), type \"|S27\">\n",
200+
"<HDF5 dataset \"granule_start_utc\": shape (1,), type \"|S27\">\n",
201+
"<HDF5 dataset \"podppd_pad\": shape (1,), type \"<f8\">\n",
202+
"<HDF5 dataset \"release\": shape (1,), type \"|S80\">\n",
203+
"<HDF5 dataset \"start_cycle\": shape (1,), type \"<i4\">\n",
204+
"<HDF5 dataset \"start_delta_time\": shape (1,), type \"<f8\">\n",
205+
"<HDF5 dataset \"start_geoseg\": shape (1,), type \"<i4\">\n",
206+
"<HDF5 dataset \"start_gpssow\": shape (1,), type \"<f8\">\n",
207+
"<HDF5 dataset \"start_gpsweek\": shape (1,), type \"<i4\">\n",
208+
"<HDF5 dataset \"start_orbit\": shape (1,), type \"<i4\">\n",
209+
"<HDF5 dataset \"start_region\": shape (1,), type \"<i4\">\n",
210+
"<HDF5 dataset \"start_rgt\": shape (1,), type \"<i4\">\n",
211+
"<HDF5 dataset \"version\": shape (1,), type \"|S80\">\n",
212+
"<HDF5 group \"/ancillary_data/atlas_engineering\" (5 members)>\n",
213+
"<HDF5 group \"/ancillary_data/calibrations\" (5 members)>\n",
214+
"<HDF5 group \"/ancillary_data/tep\" (14 members)>\n",
215+
"<HDF5 group \"/ancillary_data/gt2r\" (1 members)>\n",
216+
"<HDF5 group \"/ancillary_data/gt2l\" (1 members)>\n",
217+
"<HDF5 group \"/ancillary_data/gt3r\" (1 members)>\n",
218+
"<HDF5 group \"/ancillary_data/gt3l\" (1 members)>\n",
219+
"<HDF5 group \"/ancillary_data/gt1r\" (1 members)>\n",
220+
"<HDF5 group \"/ancillary_data/gt1l\" (1 members)>\n",
221+
"---\n",
222+
"Group: atlas_impulse_response\n",
223+
"---\n",
224+
"<HDF5 group \"/atlas_impulse_response/pce1_spot1\" (1 members)>\n",
225+
"<HDF5 group \"/atlas_impulse_response/pce2_spot3\" (1 members)>\n",
226+
"---\n",
227+
"Group: ds_surf_type\n",
228+
"---\n"
229+
]
230+
},
231+
{
232+
"ename": "AttributeError",
233+
"evalue": "'Dataset' object has no attribute 'keys'",
234+
"output_type": "error",
235+
"traceback": [
236+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
237+
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
238+
"\u001b[0;32m<ipython-input-6-0c6950a26914>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Group: {}'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'---'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0md\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mgroup\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkeys\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgroup\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
239+
"\u001b[0;31mAttributeError\u001b[0m: 'Dataset' object has no attribute 'keys'"
240+
]
186241
}
187242
],
188243
"source": [
189-
"ds"
244+
"printGroups = True\n",
245+
"groups = list(f.keys())\n",
246+
"for g in groups:\n",
247+
" group = f[g]\n",
248+
" if printGroups:\n",
249+
" print('---')\n",
250+
" print('Group: {}'.format(g))\n",
251+
" print('---')\n",
252+
" for d in group.keys():\n",
253+
" print(group[d])"
190254
]
191255
}
192256
],

0 commit comments

Comments
 (0)