Skip to content

Commit 23ab986

Browse files
committed
feat: Add support for Canon CR3 raw files
See https://github.com/lclevy/canon_cr3 for information about the CR3 file format. - Add testdata/canon-r6.cr3: valid CR3 file from a Canon R6 camera - Update to detect CR3 files in file.rs based on brand name 'crx ' - Add bbox/cr3_moov.rs to handle 'moov' boxes and bbox/uuid.rs to handle Canon UUID sub-boxes that contain EXIF data for CR3 files - Add cr3.rs to handle extracting EXIF from CR3 files - Add basic test cases for CR3 parsing
1 parent 1095a3e commit 23ab986

File tree

11 files changed

+533
-2
lines changed

11 files changed

+533
-2
lines changed

src/bbox.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use nom::{
77
number, AsChar, IResult, Needed,
88
};
99

10+
pub mod cr3_moov;
1011
mod idat;
1112
mod iinf;
1213
mod iloc;
@@ -15,6 +16,8 @@ mod keys;
1516
mod meta;
1617
mod mvhd;
1718
mod tkhd;
19+
mod uuid;
20+
pub use cr3_moov::Cr3MoovBox;
1821
pub use ilst::IlstBox;
1922
pub use keys::KeysBox;
2023
pub use meta::MetaBox;

src/bbox/cr3_moov.rs

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
use std::ops::Range;
2+
3+
use nom::{combinator::fail, IResult};
4+
5+
use super::{
6+
uuid::{CanonUuidBox, CANON_UUID, UUID_SIZE},
7+
BoxHolder,
8+
};
9+
10+
const MIN_CR3_INPUT_SIZE: usize = 8;
11+
12+
const MIN_FTYP_BODY_SIZE: usize = 4;
13+
14+
/// Represents the parsed moov box structure for Canon CR3 files.
15+
///
16+
/// Canon CR3 files are based on the ISO Base Media File Format (similar to MP4/MOV)
17+
/// but contain Canon-specific metadata in a UUID box within the moov container.
18+
/// This struct provides access to the Canon UUID box containing EXIF metadata.
19+
///
20+
/// # CR3 File Structure
21+
/// CR3 File
22+
/// +-- ftyp (file type box)
23+
/// +-- moov (movie box)
24+
/// | +-- uuid (Canon UUID box)
25+
/// | +-- CMT1 (main EXIF data)
26+
/// | +-- CMT2 (ExifIFD data)
27+
/// | +-- CMT3 (MakerNotes data)
28+
/// +-- mdat (media data)
29+
#[derive(Debug, Clone, PartialEq, Eq)]
30+
pub struct Cr3MoovBox {
31+
/// Canon's UUID box containing CMT metadata, if present
32+
uuid_canon_box: Option<CanonUuidBox>,
33+
}
34+
35+
impl Cr3MoovBox {
36+
pub fn parse(input: &[u8]) -> IResult<&[u8], Option<Cr3MoovBox>> {
37+
// Validate minimum input size
38+
if input.len() < MIN_CR3_INPUT_SIZE {
39+
tracing::warn!(
40+
"Input too small for CR3 parsing: {} bytes, expected at least {}",
41+
input.len(),
42+
MIN_CR3_INPUT_SIZE
43+
);
44+
return fail(input);
45+
}
46+
47+
let remain = input;
48+
let (remain, bbox) = BoxHolder::parse(remain)?;
49+
50+
// Verify this is a valid file format by checking for ftyp box
51+
if bbox.box_type() != "ftyp" {
52+
tracing::warn!("Expected ftyp box, found: {}", bbox.box_type());
53+
return fail(input);
54+
}
55+
56+
// Validate ftyp box has minimum required size
57+
if bbox.body_data().len() < MIN_FTYP_BODY_SIZE {
58+
tracing::warn!(
59+
"ftyp box too small: {} bytes, expected at least {}",
60+
bbox.body_data().len(),
61+
MIN_FTYP_BODY_SIZE
62+
);
63+
return fail(input);
64+
}
65+
66+
// Find the moov box containing the metadata
67+
let (remain, Some(moov_bbox)) = super::find_box(remain, "moov")? else {
68+
tracing::debug!("moov box not found in CR3 file");
69+
return Ok((remain, None));
70+
};
71+
72+
tracing::debug!(
73+
box_type = moov_bbox.box_type(),
74+
size = moov_bbox.header.box_size,
75+
"Found moov box in CR3 file"
76+
);
77+
78+
// Parse the moov box contents to find Canon UUID box
79+
let (_, moov_box) = Self::parse_moov_content(moov_bbox.body_data(), input)?;
80+
tracing::debug!(?moov_box, "Successfully parsed CR3 moov box");
81+
82+
Ok((remain, Some(moov_box)))
83+
}
84+
85+
fn parse_moov_content<'a>(
86+
moov_data: &'a [u8],
87+
full_input: &'a [u8],
88+
) -> IResult<&'a [u8], Cr3MoovBox> {
89+
let mut remain = moov_data;
90+
let mut uuid_canon_box = None;
91+
92+
// Iterate through all boxes within the moov box to find Canon's UUID box
93+
while !remain.is_empty() {
94+
let (new_remain, bbox) = match BoxHolder::parse(remain) {
95+
Ok(result) => result,
96+
Err(e) => {
97+
tracing::warn!(
98+
"Failed to parse box in moov content, continuing with partial data: {:?}",
99+
e
100+
);
101+
break; // Stop parsing but return what we found so far
102+
}
103+
};
104+
105+
if bbox.box_type() == "uuid" {
106+
let body_data = bbox.body_data();
107+
108+
// Validate UUID box has minimum required size
109+
if body_data.len() < UUID_SIZE {
110+
tracing::debug!("UUID box too small: {} bytes", body_data.len());
111+
remain = new_remain;
112+
continue;
113+
}
114+
115+
let uuid_bytes = &body_data[0..UUID_SIZE];
116+
117+
if uuid_bytes == CANON_UUID {
118+
tracing::debug!(
119+
"Found Canon UUID box with {} bytes of data",
120+
body_data.len()
121+
);
122+
let (_, canon_box) = CanonUuidBox::parse(body_data, full_input)?;
123+
uuid_canon_box = Some(canon_box);
124+
break;
125+
} else {
126+
tracing::debug!("Found non-Canon UUID box");
127+
}
128+
}
129+
130+
remain = new_remain;
131+
}
132+
133+
Ok((remain, Cr3MoovBox { uuid_canon_box }))
134+
}
135+
136+
#[allow(dead_code)] // API method for tests
137+
pub fn uuid_canon_box(&self) -> Option<&CanonUuidBox> {
138+
self.uuid_canon_box.as_ref()
139+
}
140+
141+
pub fn exif_data_offset(&self) -> Option<Range<usize>> {
142+
// For CR3, we primarily use CMT1 which contains the main EXIF IFD0 data
143+
self.uuid_canon_box.as_ref()?.exif_data_offset().cloned()
144+
}
145+
}

src/bbox/uuid.rs

Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
use std::ops::Range;
2+
3+
use nom::IResult;
4+
5+
use super::BoxHolder;
6+
use crate::exif::TiffHeader;
7+
8+
/// Size of a UUID in bytes
9+
pub const UUID_SIZE: usize = 16;
10+
11+
/// Canon CMT box types
12+
const CMT_BOX_TYPES: &[&str] = &["CMT1", "CMT2", "CMT3"];
13+
14+
/// Canon's UUID for CR3 files: 85c0b687-820f-11e0-8111-f4ce462b6a48
15+
pub const CANON_UUID: [u8; 16] = [
16+
0x85, 0xc0, 0xb6, 0x87, 0x82, 0x0f, 0x11, 0xe0, 0x81, 0x11, 0xf4, 0xce, 0x46, 0x2b, 0x6a, 0x48,
17+
];
18+
19+
/// Represents Canon's UUID box containing CMT (Canon Metadata) boxes.
20+
///
21+
/// Canon CR3 files store EXIF metadata in a proprietary UUID box format.
22+
/// The UUID box contains three CMT (Canon Metadata) sub-boxes:
23+
/// - CMT1: Main EXIF IFD0 data (camera settings, basic metadata)
24+
/// - CMT2: ExifIFD data (detailed EXIF information)
25+
/// - CMT3: MakerNotes data (Canon-specific metadata)
26+
#[derive(Debug, Clone, PartialEq, Eq)]
27+
pub struct CanonUuidBox {
28+
/// CMT1 contains the main EXIF IFD0 data (primary metadata)
29+
cmt1_offset: Option<Range<usize>>,
30+
/// CMT2 contains the ExifIFD data (detailed EXIF information)
31+
cmt2_offset: Option<Range<usize>>,
32+
/// CMT3 contains the MakerNotes data (Canon-specific metadata)
33+
cmt3_offset: Option<Range<usize>>,
34+
}
35+
36+
impl CanonUuidBox {
37+
/// Returns the offset range for the primary EXIF data (CMT1).
38+
pub fn exif_data_offset(&self) -> Option<&Range<usize>> {
39+
// For CR3, we primarily use CMT1 which contains the main EXIF IFD0 data
40+
self.cmt1_offset.as_ref()
41+
}
42+
43+
/// Returns the offset range for the ExifIFD data (CMT2).
44+
#[allow(dead_code)] // API method for future use
45+
pub fn cmt2_data_offset(&self) -> Option<&Range<usize>> {
46+
self.cmt2_offset.as_ref()
47+
}
48+
49+
/// Returns the offset range for the MakerNotes data (CMT3).
50+
#[allow(dead_code)] // API method for future use
51+
pub fn cmt3_data_offset(&self) -> Option<&Range<usize>> {
52+
self.cmt3_offset.as_ref()
53+
}
54+
55+
/// Parses Canon's UUID box to extract CMT (Canon Metadata) box offsets.
56+
pub fn parse<'a>(uuid_data: &'a [u8], full_input: &'a [u8]) -> IResult<&'a [u8], CanonUuidBox> {
57+
// Validate input sizes
58+
if uuid_data.len() < UUID_SIZE {
59+
tracing::error!(
60+
"Canon UUID box data too small: {} bytes, expected at least {}",
61+
uuid_data.len(),
62+
UUID_SIZE
63+
);
64+
return nom::combinator::fail(uuid_data);
65+
}
66+
67+
if full_input.is_empty() {
68+
tracing::error!("Full input is empty for Canon UUID box parsing");
69+
return nom::combinator::fail(uuid_data);
70+
}
71+
72+
// Skip the UUID header
73+
let mut remain = &uuid_data[UUID_SIZE..];
74+
let mut cmt1_offset = None;
75+
let mut cmt2_offset = None;
76+
let mut cmt3_offset = None;
77+
78+
tracing::debug!(
79+
"Parsing Canon UUID box with {} bytes of CMT data",
80+
remain.len()
81+
);
82+
83+
// Parse CMT boxes within the Canon UUID box
84+
while !remain.is_empty() {
85+
let (new_remain, bbox) = match BoxHolder::parse(remain) {
86+
Ok(result) => result,
87+
Err(e) => {
88+
tracing::warn!(
89+
"Failed to parse CMT box, continuing with partial data: {:?}",
90+
e
91+
);
92+
break; // Stop parsing but return what we found so far
93+
}
94+
};
95+
96+
let box_type = bbox.box_type();
97+
if CMT_BOX_TYPES.contains(&box_type) {
98+
// Calculate offset safely using slice bounds checking
99+
let data_start = bbox.data.as_ptr() as usize;
100+
let input_start = full_input.as_ptr() as usize;
101+
102+
// Ensure the data pointer is within the input bounds
103+
if data_start < input_start || data_start >= input_start + full_input.len() {
104+
tracing::warn!("CMT box data pointer outside input bounds");
105+
remain = new_remain;
106+
continue;
107+
}
108+
109+
let start_offset = data_start - input_start;
110+
let body_start = start_offset + bbox.header_size();
111+
let body_end = start_offset + bbox.data.len();
112+
113+
// Validate offset ranges are within bounds
114+
if body_end > full_input.len() {
115+
tracing::warn!(
116+
"CMT box body extends beyond input bounds: {}..{} > {}",
117+
body_start,
118+
body_end,
119+
full_input.len()
120+
);
121+
remain = new_remain;
122+
continue;
123+
}
124+
125+
let offset_range = body_start..body_end;
126+
127+
// Validate CMT box data has minimum size and reasonable content
128+
let cmt_data = &full_input[offset_range.clone()];
129+
if !Self::validate_cmt_data(box_type, cmt_data) {
130+
tracing::warn!("CMT box {} failed validation, skipping", box_type);
131+
remain = new_remain;
132+
continue;
133+
}
134+
135+
match box_type {
136+
"CMT1" => {
137+
cmt1_offset = Some(offset_range);
138+
tracing::debug!("Found CMT1 (IFD0) at offset {}..{}", body_start, body_end);
139+
}
140+
"CMT2" => {
141+
cmt2_offset = Some(offset_range);
142+
tracing::debug!(
143+
"Found CMT2 (ExifIFD) at offset {}..{}",
144+
body_start,
145+
body_end
146+
);
147+
}
148+
"CMT3" => {
149+
cmt3_offset = Some(offset_range);
150+
tracing::debug!(
151+
"Found CMT3 (MakerNotes) at offset {}..{}",
152+
body_start,
153+
body_end
154+
);
155+
}
156+
_ => unreachable!("box_type should be one of CMT1, CMT2, or CMT3"),
157+
}
158+
} else {
159+
// Skip unknown boxes within Canon UUID
160+
tracing::debug!("Skipping unknown box type: {}", box_type);
161+
}
162+
163+
remain = new_remain;
164+
}
165+
166+
Ok((
167+
remain,
168+
CanonUuidBox {
169+
cmt1_offset,
170+
cmt2_offset,
171+
cmt3_offset,
172+
},
173+
))
174+
}
175+
176+
/// Validates CMT box data for basic integrity.
177+
fn validate_cmt_data(box_type: &str, data: &[u8]) -> bool {
178+
// Minimum size check - CMT boxes should have at least 8 bytes
179+
if data.len() < 8 {
180+
tracing::warn!("CMT box {} too small: {} bytes", box_type, data.len());
181+
return false;
182+
}
183+
184+
match box_type {
185+
"CMT1" => {
186+
// CMT1 should start with TIFF header - validate using TiffHeader::parse
187+
if TiffHeader::parse(data).is_ok() {
188+
tracing::debug!("CMT1 has valid TIFF header");
189+
true
190+
} else {
191+
tracing::warn!("CMT1 does not have valid TIFF header");
192+
false
193+
}
194+
}
195+
"CMT2" | "CMT3" => {
196+
// CMT2 and CMT3 should also be TIFF format, but we're more lenient
197+
// since they might have different internal structures
198+
if data.len() >= 8 {
199+
tracing::debug!("CMT box {} has sufficient size", box_type);
200+
true
201+
} else {
202+
tracing::warn!("CMT box {} too small for valid data", box_type);
203+
false
204+
}
205+
}
206+
_ => {
207+
tracing::warn!("Unknown CMT box type: {}", box_type);
208+
false
209+
}
210+
}
211+
}
212+
}

0 commit comments

Comments
 (0)