Skip to content

Commit 154d195

Browse files
committed
minor cleanup
1 parent 21f7691 commit 154d195

File tree

2 files changed

+86
-47
lines changed

2 files changed

+86
-47
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ sha1 = "0.10.6"
2929
base16ct = { version = "0.3.0", features = ["alloc"] }
3030

3131
[features]
32-
default = ["csv","webanno","transpose", "translate","textvalidation"]
32+
default = ["csv","webanno","transpose","translate","textvalidation"]
3333
csv = []
3434
webanno = []
3535
transpose = []

src/api/translate.rs

Lines changed: 85 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
1-
2-
use crate::{api::*, ResultTextSelection};
31
use crate::datavalue::DataValue;
42
use crate::selector::{Offset, SelectorBuilder};
53
use crate::text::Text;
64
use crate::textselection::{ResultTextSelectionSet, TestTextSelection};
75
use crate::AnnotationBuilder;
86
use crate::StamError;
7+
use crate::{api::*, ResultTextSelection};
98

109
use smallvec::SmallVec;
1110

@@ -16,7 +15,6 @@ pub struct TranslateConfig {
1615
/// Allow a simple translation as output, by default this is set to `false` as we usually want to have an transposed annotation
1716
pub allow_simple: bool,
1817

19-
2018
/// Do not produce a translation annotation, only output the translated annotation (allow_simple must be set to false)
2119
/// This effectively throws away the provenance information.
2220
pub no_translation: bool,
@@ -33,8 +31,8 @@ pub struct TranslateConfig {
3331
/// Indicates that the source part of the transposition is an existing annotation. This is usually set automatically after setting `source_side_id` to an existing ID.
3432
pub existing_source_side: bool,
3533

36-
/// Do not produce a resegmentation annotation.
37-
/// This maps a translation directly and allows losing segmentation information.
34+
/// Do not produce a resegmentation annotation.
35+
/// This maps a translation directly and allows losing segmentation information.
3836
/// In doing so, it reduces complexity of the output annotations.
3937
/// If this is set, no resegmentations will be produced, but the resulting translations
4038
/// may lose some of its fine-grained information, which limits the ability to reuse them as a translation pivot
@@ -131,32 +129,29 @@ impl<'store> Translatable<'store> for ResultTextSelectionSet<'store> {
131129

132130
let mut builders: Vec<AnnotationBuilder<'static>> = Vec::with_capacity(3);
133131
// Keeps track of which side of the translation the source is found
134-
let mut source_side: Option<usize> =
135-
if let TranslationSide::ByIndex(i) = config.source_side {
136-
Some(i)
137-
} else {
138-
None
139-
};
132+
let mut source_side: Option<usize> = if let TranslationSide::ByIndex(i) = config.source_side
133+
{
134+
Some(i)
135+
} else {
136+
None
137+
};
140138
let mut refseqnrs: Vec<usize> = Vec::new(); //the the sequence number of the covered text selections (in a particular side)
141-
// Found (source) or mapped (target) text selections per side, the first index corresponds to a side
139+
// Found (source) or mapped (target) text selections per side, the first index corresponds to a side
142140
let mut selectors_per_side: SmallVec<[Vec<SelectorBuilder<'static>>; 2]> = SmallVec::new();
143141

144142
let resource = self.resource();
145143
let mut simple_translation = true; //falsify, simple translation are not suitable as pivot (no-op)
146144
let mut resegment = false; //resegmentations are produced when the translated annotation covers multiple source text selections, and when users do not want to lose this segmentation (!no_resegmentation)
147145

148-
149146
if config.debug {
150147
eprintln!("[stam translate] ----------------------------");
151148
}
152149

153-
154150
let mut sourcecoverage = 0;
155151
// match the current textselectionset against all the sides in a complex translation (or ascertain
156152
// that we are dealing with a simple translation instead) the source side that matches
157153
// can never be the same as the target side that is mapped to
158154
for tsel in self.inner().iter() {
159-
160155
// iterate over all the sides
161156
for (side_i, annotation) in via.annotations_in_targets(AnnotationDepth::One).enumerate()
162157
{
@@ -166,39 +161,63 @@ impl<'store> Translatable<'store> for ResultTextSelectionSet<'store> {
166161
}
167162

168163
if config.debug {
169-
let tsel = ResultTextSelection::Unbound(self.rootstore(), resource.as_ref() ,tsel.clone());
170-
eprintln!("[stam translate] Looking for source fragment \"{}\" in side {}", tsel.text().replace("\n","\\n"), side_i);
164+
let tsel = ResultTextSelection::Unbound(
165+
self.rootstore(),
166+
resource.as_ref(),
167+
tsel.clone(),
168+
);
169+
eprintln!(
170+
"[stam translate] Looking for source fragment \"{}\" in side {}",
171+
tsel.text().replace("\n", "\\n"),
172+
side_i
173+
);
171174
}
172175

173176
// We may have multiple text selections (tsel) to translate (all must be found)
174177
let mut remainder = Some(tsel.clone());
175178

176179
for (refseqnr, reftsel) in annotation.textselections().enumerate() {
177-
if reftsel.resource() == resource && (source_side.is_none() || source_side == Some(side_i)) //source side check
180+
if reftsel.resource() == resource
181+
&& (source_side.is_none() || source_side == Some(side_i))
182+
//source side check
178183
{
179184
// get the all reference text selections that are embedded in our text selection (tsel)
180185
// we must have full coverage for a translation to be valid
181-
if tsel.test(&TextSelectionOperator::embeds(), reftsel.inner(), resource.as_ref()) {
186+
if tsel.test(
187+
&TextSelectionOperator::embeds(),
188+
reftsel.inner(),
189+
resource.as_ref(),
190+
) {
182191
refseqnrs.push(refseqnr);
183192
selectors_per_side[side_i].push(SelectorBuilder::TextSelector(
184193
resource.handle().into(),
185-
reftsel.inner().into()
194+
reftsel.inner().into(),
186195
));
187-
if let Some((_, new_remainder,_)) = remainder.unwrap().intersection(reftsel.inner()) {
196+
if let Some((_, new_remainder, _)) =
197+
remainder.unwrap().intersection(reftsel.inner())
198+
{
188199
remainder = new_remainder;
189200
if config.debug {
190-
let tmp = ResultTextSelection::Unbound(self.rootstore(), resource.as_ref() ,tsel.clone());
201+
let tmp = ResultTextSelection::Unbound(
202+
self.rootstore(),
203+
resource.as_ref(),
204+
tsel.clone(),
205+
);
191206
if let Some(remainder) = remainder {
192-
let remainder = ResultTextSelection::Unbound(self.rootstore(), resource.as_ref() ,remainder.clone());
193-
eprintln!("[stam translate] Found source fragment: \"{}\" for \"{}\" with remainder \"{}\"",
207+
let remainder = ResultTextSelection::Unbound(
208+
self.rootstore(),
209+
resource.as_ref(),
210+
remainder.clone(),
211+
);
212+
eprintln!("[stam translate] Found source fragment: \"{}\" for \"{}\" with remainder \"{}\"",
194213
&reftsel.text().replace("\n", "\\n"),
195-
&tmp.text().replace("\n", "\\n"),
214+
&tmp.text().replace("\n", "\\n"),
196215
remainder.text().replace("\n","\\n")
197216
);
198217
} else {
199-
eprintln!("[stam translate] Found source fragment: \"{}\" for \"{}\" (no remainder)",
218+
eprintln!("[stam translate] Found source fragment: \"{}\" for \"{}\" (no remainder)",
200219
&reftsel.text().replace("\n", "\\n"),
201-
&tmp.text().replace("\n", "\\n"),
220+
&tmp.text().replace("\n", "\\n"),
202221
);
203222
}
204223
}
@@ -231,15 +250,12 @@ impl<'store> Translatable<'store> for ResultTextSelectionSet<'store> {
231250
}
232251
}
233252

234-
235253
if simple_translation {
236254
//translating over a simple translation is a no-op, as it can only
237255
//produce the pivot as output
238256
// We may have multiple text selections to translate (all must be found)
239257
return Err(StamError::TranslateError(
240-
format!(
241-
"Can not translate over a simple translation, pivot has to be complex"
242-
),
258+
format!("Can not translate over a simple translation, pivot has to be complex"),
243259
"",
244260
));
245261
} else {
@@ -263,7 +279,10 @@ impl<'store> Translatable<'store> for ResultTextSelectionSet<'store> {
263279
if source_side != Some(side_i) {
264280
for refseqnr in refseqnrs.iter() {
265281
//select the text selection we seek
266-
let reftsel = annotation.textselections().nth(*refseqnr).expect("element must exist"); //MAYBE TODO: improve performance
282+
let reftsel = annotation
283+
.textselections()
284+
.nth(*refseqnr)
285+
.expect("element must exist"); //MAYBE TODO: improve performance
267286
let mapped_selector: SelectorBuilder<'static> =
268287
SelectorBuilder::TextSelector(
269288
reftsel.resource().handle().into(),
@@ -287,12 +306,13 @@ impl<'store> Translatable<'store> for ResultTextSelectionSet<'store> {
287306

288307
if (config.allow_simple || config.no_resegmentation) && resegment {
289308
//try to simplify the translation by joining adjacent selectors
290-
selectors_per_side = merge_selectors(selectors_per_side, source_side.unwrap(), config.debug);
309+
selectors_per_side =
310+
merge_selectors(selectors_per_side, source_side.unwrap(), config.debug);
291311
resegment = false;
292312
}
293313

294314
match selectors_per_side[source_side.expect("source side must exist at this point")].len() {
295-
0 =>
315+
0 =>
296316
Err(StamError::TranslateError(
297317
format!(
298318
"No source fragments were found in the complex translation {}, source side has 0 fragments, unable to translate",
@@ -499,25 +519,28 @@ impl<'store> Translatable<'store> for ResultTextSelectionSet<'store> {
499519

500520
/// Merges adjacent selectors
501521
/// Used when doing translations with lose_segmentation
502-
/// Leads to simpler output (but less powerful)
503-
fn merge_selectors(selectors_per_side: SmallVec<[Vec<SelectorBuilder<'static>>; 2]>, source_side: usize, debug: bool) ->
504-
SmallVec<[Vec<SelectorBuilder<'static>>; 2]> {
522+
/// Leads to simpler output (but less powerful)
523+
fn merge_selectors(
524+
selectors_per_side: SmallVec<[Vec<SelectorBuilder<'static>>; 2]>,
525+
source_side: usize,
526+
debug: bool,
527+
) -> SmallVec<[Vec<SelectorBuilder<'static>>; 2]> {
505528
let mut mergable = Vec::new();
506529
let mut need_merge = false;
507530

508531
// scan the source side for adjacent selectors to be merged:
509532
let mut cursor: Option<isize> = None;
510533
// index of the begin selector
511534
let mut begin_index: usize = 0;
512-
if let Some(selectors) = selectors_per_side.get(source_side) {
535+
if let Some(selectors) = selectors_per_side.get(source_side) {
513536
for (i, selector) in selectors.iter().enumerate() {
514537
if cursor.is_some() && Some(selector.offset().unwrap().begin.into()) != cursor {
515-
mergable.push((begin_index,i));
538+
mergable.push((begin_index, i));
516539
begin_index = i + 1;
517540
}
518541
cursor = Some(selector.offset().unwrap().end.into());
519542
}
520-
mergable.push((begin_index,selectors.len() - 1)); //last one
543+
mergable.push((begin_index, selectors.len() - 1)); //last one
521544
need_merge = selectors.len() > mergable.len()
522545
}
523546

@@ -528,7 +551,10 @@ SmallVec<[Vec<SelectorBuilder<'static>>; 2]> {
528551
}
529552
return selectors_per_side;
530553
} else if debug {
531-
eprintln!("[stam translate] merging selectors (indices): {:?}", mergable);
554+
eprintln!(
555+
"[stam translate] merging selectors (indices): {:?}",
556+
mergable
557+
);
532558
}
533559

534560
// merge the selectors
@@ -537,11 +563,24 @@ SmallVec<[Vec<SelectorBuilder<'static>>; 2]> {
537563
let mut new_selectors: Vec<SelectorBuilder<'static>> = Vec::new();
538564
let resource = selectors.get(0).unwrap().resource().unwrap();
539565
for (begin_index, end_index) in mergable.iter() {
540-
let begin: isize = selectors.get(*begin_index).unwrap().offset().unwrap().begin.into();
541-
let end: isize = selectors.get(*end_index).unwrap().offset().unwrap().end.into();
542-
new_selectors.push(SelectorBuilder::textselector(resource.clone(), Offset::simple(
543-
begin as usize, end as usize
544-
)));
566+
let begin: isize = selectors
567+
.get(*begin_index)
568+
.unwrap()
569+
.offset()
570+
.unwrap()
571+
.begin
572+
.into();
573+
let end: isize = selectors
574+
.get(*end_index)
575+
.unwrap()
576+
.offset()
577+
.unwrap()
578+
.end
579+
.into();
580+
new_selectors.push(SelectorBuilder::textselector(
581+
resource.clone(),
582+
Offset::simple(begin as usize, end as usize),
583+
));
545584
}
546585
merged_selectors.push(new_selectors);
547586
}

0 commit comments

Comments
 (0)