1-
2- use crate :: { api:: * , ResultTextSelection } ;
31use crate :: datavalue:: DataValue ;
42use crate :: selector:: { Offset , SelectorBuilder } ;
53use crate :: text:: Text ;
64use crate :: textselection:: { ResultTextSelectionSet , TestTextSelection } ;
75use crate :: AnnotationBuilder ;
86use crate :: StamError ;
7+ use crate :: { api:: * , ResultTextSelection } ;
98
109use smallvec:: SmallVec ;
1110
@@ -16,7 +15,6 @@ pub struct TranslateConfig {
1615 /// Allow a simple translation as output, by default this is set to `false` as we usually want to have an transposed annotation
1716 pub allow_simple : bool ,
1817
19-
2018 /// Do not produce a translation annotation, only output the translated annotation (allow_simple must be set to false)
2119 /// This effectively throws away the provenance information.
2220 pub no_translation : bool ,
@@ -33,8 +31,8 @@ pub struct TranslateConfig {
3331 /// Indicates that the source part of the transposition is an existing annotation. This is usually set automatically after setting `source_side_id` to an existing ID.
3432 pub existing_source_side : bool ,
3533
36- /// Do not produce a resegmentation annotation.
37- /// This maps a translation directly and allows losing segmentation information.
34+ /// Do not produce a resegmentation annotation.
35+ /// This maps a translation directly and allows losing segmentation information.
3836 /// In doing so, it reduces complexity of the output annotations.
3937 /// If this is set, no resegmentations will be produced, but the resulting translations
4038 /// may lose some of its fine-grained information, which limits the ability to reuse them as a translation pivot
@@ -131,32 +129,29 @@ impl<'store> Translatable<'store> for ResultTextSelectionSet<'store> {
131129
132130 let mut builders: Vec < AnnotationBuilder < ' static > > = Vec :: with_capacity ( 3 ) ;
133131 // Keeps track of which side of the translation the source is found
134- let mut source_side: Option < usize > =
135- if let TranslationSide :: ByIndex ( i ) = config . source_side {
136- Some ( i)
137- } else {
138- None
139- } ;
132+ let mut source_side: Option < usize > = if let TranslationSide :: ByIndex ( i ) = config . source_side
133+ {
134+ Some ( i)
135+ } else {
136+ None
137+ } ;
140138 let mut refseqnrs: Vec < usize > = Vec :: new ( ) ; //the the sequence number of the covered text selections (in a particular side)
141- // Found (source) or mapped (target) text selections per side, the first index corresponds to a side
139+ // Found (source) or mapped (target) text selections per side, the first index corresponds to a side
142140 let mut selectors_per_side: SmallVec < [ Vec < SelectorBuilder < ' static > > ; 2 ] > = SmallVec :: new ( ) ;
143141
144142 let resource = self . resource ( ) ;
145143 let mut simple_translation = true ; //falsify, simple translation are not suitable as pivot (no-op)
146144 let mut resegment = false ; //resegmentations are produced when the translated annotation covers multiple source text selections, and when users do not want to lose this segmentation (!no_resegmentation)
147145
148-
149146 if config. debug {
150147 eprintln ! ( "[stam translate] ----------------------------" ) ;
151148 }
152149
153-
154150 let mut sourcecoverage = 0 ;
155151 // match the current textselectionset against all the sides in a complex translation (or ascertain
156152 // that we are dealing with a simple translation instead) the source side that matches
157153 // can never be the same as the target side that is mapped to
158154 for tsel in self . inner ( ) . iter ( ) {
159-
160155 // iterate over all the sides
161156 for ( side_i, annotation) in via. annotations_in_targets ( AnnotationDepth :: One ) . enumerate ( )
162157 {
@@ -166,39 +161,63 @@ impl<'store> Translatable<'store> for ResultTextSelectionSet<'store> {
166161 }
167162
168163 if config. debug {
169- let tsel = ResultTextSelection :: Unbound ( self . rootstore ( ) , resource. as_ref ( ) , tsel. clone ( ) ) ;
170- eprintln ! ( "[stam translate] Looking for source fragment \" {}\" in side {}" , tsel. text( ) . replace( "\n " , "\\ n" ) , side_i) ;
164+ let tsel = ResultTextSelection :: Unbound (
165+ self . rootstore ( ) ,
166+ resource. as_ref ( ) ,
167+ tsel. clone ( ) ,
168+ ) ;
169+ eprintln ! (
170+ "[stam translate] Looking for source fragment \" {}\" in side {}" ,
171+ tsel. text( ) . replace( "\n " , "\\ n" ) ,
172+ side_i
173+ ) ;
171174 }
172175
173176 // We may have multiple text selections (tsel) to translate (all must be found)
174177 let mut remainder = Some ( tsel. clone ( ) ) ;
175178
176179 for ( refseqnr, reftsel) in annotation. textselections ( ) . enumerate ( ) {
177- if reftsel. resource ( ) == resource && ( source_side. is_none ( ) || source_side == Some ( side_i) ) //source side check
180+ if reftsel. resource ( ) == resource
181+ && ( source_side. is_none ( ) || source_side == Some ( side_i) )
182+ //source side check
178183 {
179184 // get the all reference text selections that are embedded in our text selection (tsel)
180185 // we must have full coverage for a translation to be valid
181- if tsel. test ( & TextSelectionOperator :: embeds ( ) , reftsel. inner ( ) , resource. as_ref ( ) ) {
186+ if tsel. test (
187+ & TextSelectionOperator :: embeds ( ) ,
188+ reftsel. inner ( ) ,
189+ resource. as_ref ( ) ,
190+ ) {
182191 refseqnrs. push ( refseqnr) ;
183192 selectors_per_side[ side_i] . push ( SelectorBuilder :: TextSelector (
184193 resource. handle ( ) . into ( ) ,
185- reftsel. inner ( ) . into ( )
194+ reftsel. inner ( ) . into ( ) ,
186195 ) ) ;
187- if let Some ( ( _, new_remainder, _) ) = remainder. unwrap ( ) . intersection ( reftsel. inner ( ) ) {
196+ if let Some ( ( _, new_remainder, _) ) =
197+ remainder. unwrap ( ) . intersection ( reftsel. inner ( ) )
198+ {
188199 remainder = new_remainder;
189200 if config. debug {
190- let tmp = ResultTextSelection :: Unbound ( self . rootstore ( ) , resource. as_ref ( ) , tsel. clone ( ) ) ;
201+ let tmp = ResultTextSelection :: Unbound (
202+ self . rootstore ( ) ,
203+ resource. as_ref ( ) ,
204+ tsel. clone ( ) ,
205+ ) ;
191206 if let Some ( remainder) = remainder {
192- let remainder = ResultTextSelection :: Unbound ( self . rootstore ( ) , resource. as_ref ( ) , remainder. clone ( ) ) ;
193- eprintln ! ( "[stam translate] Found source fragment: \" {}\" for \" {}\" with remainder \" {}\" " ,
207+ let remainder = ResultTextSelection :: Unbound (
208+ self . rootstore ( ) ,
209+ resource. as_ref ( ) ,
210+ remainder. clone ( ) ,
211+ ) ;
212+ eprintln ! ( "[stam translate] Found source fragment: \" {}\" for \" {}\" with remainder \" {}\" " ,
194213 & reftsel. text( ) . replace( "\n " , "\\ n" ) ,
195- & tmp. text( ) . replace( "\n " , "\\ n" ) ,
214+ & tmp. text( ) . replace( "\n " , "\\ n" ) ,
196215 remainder. text( ) . replace( "\n " , "\\ n" )
197216 ) ;
198217 } else {
199- eprintln ! ( "[stam translate] Found source fragment: \" {}\" for \" {}\" (no remainder)" ,
218+ eprintln ! ( "[stam translate] Found source fragment: \" {}\" for \" {}\" (no remainder)" ,
200219 & reftsel. text( ) . replace( "\n " , "\\ n" ) ,
201- & tmp. text( ) . replace( "\n " , "\\ n" ) ,
220+ & tmp. text( ) . replace( "\n " , "\\ n" ) ,
202221 ) ;
203222 }
204223 }
@@ -231,15 +250,12 @@ impl<'store> Translatable<'store> for ResultTextSelectionSet<'store> {
231250 }
232251 }
233252
234-
235253 if simple_translation {
236254 //translating over a simple translation is a no-op, as it can only
237255 //produce the pivot as output
238256 // We may have multiple text selections to translate (all must be found)
239257 return Err ( StamError :: TranslateError (
240- format ! (
241- "Can not translate over a simple translation, pivot has to be complex"
242- ) ,
258+ format ! ( "Can not translate over a simple translation, pivot has to be complex" ) ,
243259 "" ,
244260 ) ) ;
245261 } else {
@@ -263,7 +279,10 @@ impl<'store> Translatable<'store> for ResultTextSelectionSet<'store> {
263279 if source_side != Some ( side_i) {
264280 for refseqnr in refseqnrs. iter ( ) {
265281 //select the text selection we seek
266- let reftsel = annotation. textselections ( ) . nth ( * refseqnr) . expect ( "element must exist" ) ; //MAYBE TODO: improve performance
282+ let reftsel = annotation
283+ . textselections ( )
284+ . nth ( * refseqnr)
285+ . expect ( "element must exist" ) ; //MAYBE TODO: improve performance
267286 let mapped_selector: SelectorBuilder < ' static > =
268287 SelectorBuilder :: TextSelector (
269288 reftsel. resource ( ) . handle ( ) . into ( ) ,
@@ -287,12 +306,13 @@ impl<'store> Translatable<'store> for ResultTextSelectionSet<'store> {
287306
288307 if ( config. allow_simple || config. no_resegmentation ) && resegment {
289308 //try to simplify the translation by joining adjacent selectors
290- selectors_per_side = merge_selectors ( selectors_per_side, source_side. unwrap ( ) , config. debug ) ;
309+ selectors_per_side =
310+ merge_selectors ( selectors_per_side, source_side. unwrap ( ) , config. debug ) ;
291311 resegment = false ;
292312 }
293313
294314 match selectors_per_side[ source_side. expect ( "source side must exist at this point" ) ] . len ( ) {
295- 0 =>
315+ 0 =>
296316 Err ( StamError :: TranslateError (
297317 format ! (
298318 "No source fragments were found in the complex translation {}, source side has 0 fragments, unable to translate" ,
@@ -499,25 +519,28 @@ impl<'store> Translatable<'store> for ResultTextSelectionSet<'store> {
499519
500520/// Merges adjacent selectors
501521/// Used when doing translations with lose_segmentation
502- /// Leads to simpler output (but less powerful)
503- fn merge_selectors ( selectors_per_side : SmallVec < [ Vec < SelectorBuilder < ' static > > ; 2 ] > , source_side : usize , debug : bool ) ->
504- SmallVec < [ Vec < SelectorBuilder < ' static > > ; 2 ] > {
522+ /// Leads to simpler output (but less powerful)
523+ fn merge_selectors (
524+ selectors_per_side : SmallVec < [ Vec < SelectorBuilder < ' static > > ; 2 ] > ,
525+ source_side : usize ,
526+ debug : bool ,
527+ ) -> SmallVec < [ Vec < SelectorBuilder < ' static > > ; 2 ] > {
505528 let mut mergable = Vec :: new ( ) ;
506529 let mut need_merge = false ;
507530
508531 // scan the source side for adjacent selectors to be merged:
509532 let mut cursor: Option < isize > = None ;
510533 // index of the begin selector
511534 let mut begin_index: usize = 0 ;
512- if let Some ( selectors) = selectors_per_side. get ( source_side) {
535+ if let Some ( selectors) = selectors_per_side. get ( source_side) {
513536 for ( i, selector) in selectors. iter ( ) . enumerate ( ) {
514537 if cursor. is_some ( ) && Some ( selector. offset ( ) . unwrap ( ) . begin . into ( ) ) != cursor {
515- mergable. push ( ( begin_index, i) ) ;
538+ mergable. push ( ( begin_index, i) ) ;
516539 begin_index = i + 1 ;
517540 }
518541 cursor = Some ( selector. offset ( ) . unwrap ( ) . end . into ( ) ) ;
519542 }
520- mergable. push ( ( begin_index, selectors. len ( ) - 1 ) ) ; //last one
543+ mergable. push ( ( begin_index, selectors. len ( ) - 1 ) ) ; //last one
521544 need_merge = selectors. len ( ) > mergable. len ( )
522545 }
523546
@@ -528,7 +551,10 @@ SmallVec<[Vec<SelectorBuilder<'static>>; 2]> {
528551 }
529552 return selectors_per_side;
530553 } else if debug {
531- eprintln ! ( "[stam translate] merging selectors (indices): {:?}" , mergable) ;
554+ eprintln ! (
555+ "[stam translate] merging selectors (indices): {:?}" ,
556+ mergable
557+ ) ;
532558 }
533559
534560 // merge the selectors
@@ -537,11 +563,24 @@ SmallVec<[Vec<SelectorBuilder<'static>>; 2]> {
537563 let mut new_selectors: Vec < SelectorBuilder < ' static > > = Vec :: new ( ) ;
538564 let resource = selectors. get ( 0 ) . unwrap ( ) . resource ( ) . unwrap ( ) ;
539565 for ( begin_index, end_index) in mergable. iter ( ) {
540- let begin: isize = selectors. get ( * begin_index) . unwrap ( ) . offset ( ) . unwrap ( ) . begin . into ( ) ;
541- let end: isize = selectors. get ( * end_index) . unwrap ( ) . offset ( ) . unwrap ( ) . end . into ( ) ;
542- new_selectors. push ( SelectorBuilder :: textselector ( resource. clone ( ) , Offset :: simple (
543- begin as usize , end as usize
544- ) ) ) ;
566+ let begin: isize = selectors
567+ . get ( * begin_index)
568+ . unwrap ( )
569+ . offset ( )
570+ . unwrap ( )
571+ . begin
572+ . into ( ) ;
573+ let end: isize = selectors
574+ . get ( * end_index)
575+ . unwrap ( )
576+ . offset ( )
577+ . unwrap ( )
578+ . end
579+ . into ( ) ;
580+ new_selectors. push ( SelectorBuilder :: textselector (
581+ resource. clone ( ) ,
582+ Offset :: simple ( begin as usize , end as usize ) ,
583+ ) ) ;
545584 }
546585 merged_selectors. push ( new_selectors) ;
547586 }
0 commit comments