marctk/
record.rs

1//! Base MARC record model and associated components.
2
3use crate::query::ComplexSpecification;
4const TAG_SIZE: usize = 3;
5const LEADER_SIZE: usize = 24;
6const CODE_SIZE: usize = 1;
7const DEFAULT_LEADER: &str = "                        ";
8const DEFAULT_INDICATOR: &str = " ";
9
10/// Verifies the provided string is composed of 'len' number of bytes.
11fn check_byte_count(s: &str, len: usize) -> Result<(), String> {
12    let byte_len = s.len();
13    if byte_len != len {
14        return Err(format!(
15            "Invalid byte count for string s={s} wanted={len} found={byte_len}"
16        ));
17    }
18    Ok(())
19}
20
21/// MARC Control Field whose tag value is < "010"
22#[derive(Debug, Clone, PartialEq)]
23pub struct Controlfield {
24    tag: String,
25    content: String,
26}
27
28impl Controlfield {
29    /// Create a Controlfield with the provided tag and content.
30    ///
31    /// * `tag` - Must have the correct byte count.
32    ///
33    /// # Examples
34    ///
35    /// ```
36    /// let control_field = marctk::Controlfield::new("008", "12345").unwrap();
37    /// assert_eq!(control_field.tag(), "008");
38    /// ```
39    /// ```
40    /// let control_field = marctk::Controlfield::new("010", "12345");
41    ///
42    /// assert_eq!(control_field.is_err(), true);
43    /// assert_eq!(control_field.unwrap_err(), "Invalid Controlfield tag: 010");
44    /// ```
45    pub fn new(tag: impl Into<String>, content: impl Into<String>) -> Result<Self, String> {
46        let tag = tag.into();
47        check_byte_count(&tag, TAG_SIZE)?;
48
49        if tag.as_str() < "000" || tag.as_str() > "009" {
50            return Err(format!("Invalid Controlfield tag: {tag}"));
51        }
52
53        Ok(Controlfield {
54            tag,
55            content: content.into(),
56        })
57    }
58
59    /// Get the tag
60    ///
61    /// # Examples
62    ///
63    /// ```
64    /// use marctk::Controlfield;
65    ///
66    /// let control_field = Controlfield::new("008", "12345").unwrap();
67    /// assert_eq!(control_field.tag(), "008");
68    /// ```
69    pub fn tag(&self) -> &str {
70        &self.tag
71    }
72
73    /// Get the content
74    ///
75    /// # Examples
76    ///
77    /// ```
78    /// use marctk::Controlfield;
79    ///
80    /// let control_field = Controlfield::new("008", "12345").unwrap();
81    /// assert_eq!(control_field.content(), "12345");
82    /// ```
83    pub fn content(&self) -> &str {
84        &self.content
85    }
86
87    /// Set the Controlfield content.
88    ///
89    /// # Examples
90    ///
91    /// ```
92    /// use marctk::Controlfield;
93    ///
94    /// let mut control_field = Controlfield::new("008", "12345").unwrap();
95    /// control_field.set_content("6789");
96    /// assert_eq!(control_field.content(), "6789");
97    /// ```
98    pub fn set_content(&mut self, content: impl Into<String>) {
99        self.content = content.into();
100    }
101}
102
103/// A single subfield code + value pair
104#[derive(Debug, Clone, PartialEq)]
105pub struct Subfield {
106    code: String,
107    content: String,
108}
109
110impl Subfield {
111    /// Create a Subfield with the provided code and content.
112    ///
113    /// * `code` - Must have the correct byte count.
114    ///
115    /// # Examples
116    ///
117    /// ```
118    /// use marctk::Subfield;
119    /// let subfield: Subfield = match Subfield::new("a", "Στη σκιά της πεταλούδας") {
120    ///   Ok(sf) => sf,
121    ///   Err(e) => panic!("Subfield::new() failed with: {}", e),
122    /// };
123    /// assert_eq!(subfield.content(), "Στη σκιά της πεταλούδας");
124    /// ```
125    ///
126    /// ```should_panic
127    /// use marctk::Subfield;
128    /// Subfield::new("🦋", "Στη σκιά της πεταλούδας").unwrap();
129    /// ```
130    ///
131    pub fn new(code: impl Into<String>, content: impl Into<String>) -> Result<Self, String> {
132        let code = code.into();
133        check_byte_count(&code, CODE_SIZE)?;
134        Ok(Subfield {
135            code,
136            content: content.into(),
137        })
138    }
139    /// Get the Subfield content.
140    pub fn content(&self) -> &str {
141        &self.content
142    }
143    /// Set the Subfield content.
144    ///
145    /// # Examples
146    ///
147    /// ```
148    /// use marctk::Subfield;
149    /// let mut subfield: Subfield = Subfield::new("a", "potato").unwrap();
150    /// subfield.set_content("cheese");
151    /// assert_eq!(subfield.content(), "cheese");
152    /// ```
153    ///
154    pub fn set_content(&mut self, content: impl Into<String>) {
155        self.content = content.into();
156    }
157    /// Get the Subfield code.
158    ///
159    /// # Examples
160    ///
161    /// ```
162    /// use marctk::Subfield;
163    /// let subfield: Subfield = Subfield::new("a", "potato").unwrap();
164    /// assert_eq!(subfield.code(), "a");
165    /// ```
166    ///
167    pub fn code(&self) -> &str {
168        &self.code
169    }
170    /// Set the Subfield code.
171    ///
172    /// # Examples
173    ///
174    /// ```
175    /// use marctk::Subfield;
176    /// let mut subfield: Subfield = Subfield::new("a", "potato").unwrap();
177    /// subfield.set_code("q");
178    /// assert_eq!(subfield.code(), "q");
179    /// ```
180    ///
181    /// ```should_panic
182    /// use marctk::Subfield;
183    /// let mut subfield: Subfield = Subfield::new("a", "potato").unwrap();
184    /// subfield.set_code("🥔").unwrap();
185    /// ```
186    ///
187    pub fn set_code(&mut self, code: impl Into<String>) -> Result<(), String> {
188        let code: String = code.into();
189        check_byte_count(&code, CODE_SIZE)?;
190        self.code = code;
191        Ok(())
192    }
193}
194
195/// A MARC Data Field with tag, indicators, and subfields.
196#[derive(Debug, Clone, PartialEq)]
197pub struct Field {
198    tag: String,
199    ind1: Option<String>,
200    ind2: Option<String>,
201    subfields: Vec<Subfield>,
202}
203
204impl Field {
205    /// Create a Field with the provided tag.
206    ///
207    /// * `tag` - Must have the correct byte count.
208    ///
209    /// # Examples
210    ///
211    /// ```
212    /// use marctk::Field;
213    ///
214    /// let field: Field = match Field::new("245") {
215    ///   Ok(f) => f,
216    ///   Err(e) => panic!("Field::new() failed with: {}", e),
217    /// };
218    /// assert_eq!(field.tag(), "245");
219    /// assert_eq!(field.ind1(), " ");
220    /// assert_eq!(field.ind2(), " ");
221    /// assert_eq!(field.subfields().len(), 0);
222    /// ```
223    ///
224    pub fn new(tag: impl Into<String>) -> Result<Self, String> {
225        let tag = tag.into();
226        check_byte_count(&tag, TAG_SIZE)?;
227
228        if tag.as_str() < "010" || tag.as_str() > "999" {
229            // Of note, OCLC sometimes creates MARC records with data
230            // fields using the tag "DAT".  For our purposes, the only
231            // thing that really matters is the byte count (checked
232            // above), so just warn for unexpected tags.
233            eprintln!("Unexpected tag for data field: '{tag}'");
234        }
235
236        Ok(Field {
237            tag,
238            ind1: None,
239            ind2: None,
240            subfields: Vec::new(),
241        })
242    }
243    /// Get the tag
244    pub fn tag(&self) -> &str {
245        &self.tag
246    }
247    /// Get the value of indicator-1, defaulting to DEFAULT_INDICATOR.
248    pub fn ind1(&self) -> &str {
249        self.ind1.as_deref().unwrap_or(DEFAULT_INDICATOR)
250    }
251    /// Get the value of indicator-2, defaulting to DEFAULT_INDICATOR.
252    pub fn ind2(&self) -> &str {
253        self.ind2.as_deref().unwrap_or(DEFAULT_INDICATOR)
254    }
255    /// Get the full list of subfields
256    pub fn subfields(&self) -> &Vec<Subfield> {
257        &self.subfields
258    }
259    /// Get a mutable list of subfields.
260    pub fn subfields_mut(&mut self) -> &mut Vec<Subfield> {
261        &mut self.subfields
262    }
263
264    /// Set the indicator-1 value.
265    ///
266    /// * `ind` - Must have the correct byte count.
267    pub fn set_ind1(&mut self, ind: impl Into<String>) -> Result<(), String> {
268        let ind = ind.into();
269        check_byte_count(&ind, CODE_SIZE)?;
270        self.ind1 = Some(ind);
271        Ok(())
272    }
273
274    /// Set the indicator-2 value.
275    ///
276    /// * `ind` - Must have the correct byte count.
277    pub fn set_ind2(&mut self, ind: impl Into<String>) -> Result<(), String> {
278        let ind = ind.into();
279        check_byte_count(&ind, CODE_SIZE)?;
280        self.ind2 = Some(ind);
281        Ok(())
282    }
283
284    /// Get a list of subfields with the provided code.
285    pub fn get_subfields(&self, code: &str) -> Vec<&Subfield> {
286        self.subfields.iter().filter(|f| f.code() == code).collect()
287    }
288
289    /// Get the first occurrence of the subfield with the provided code,
290    /// if one is present.
291    ///
292    /// # Examples
293    ///
294    /// ```
295    /// use marctk::Field;
296    ///
297    /// let mut field: Field =  Field::new("245").unwrap();
298    /// assert!(field.first_subfield("a").is_none());
299    ///
300    /// field.add_subfield("a", "First one").unwrap();
301    /// field.add_subfield("a", "Second one").unwrap();
302    ///
303    /// assert_eq!(field.first_subfield("a").unwrap().content(), "First one");
304    /// ```
305    pub fn first_subfield(&self, code: &str) -> Option<&Subfield> {
306        self.subfields.iter().find(|f| f.code() == code)
307    }
308
309    /// Mutable variant of ['first_subfield()`].
310    ///
311    /// # Examples
312    ///
313    /// ```
314    /// use marctk::Field;
315    ///
316    /// let mut field: Field =  Field::new("245").unwrap();
317    /// assert!(field.first_subfield("a").is_none());
318    ///
319    /// field.add_subfield("a", "First one").unwrap();
320    ///
321    /// field.first_subfield_mut("a").unwrap().set_content("Other text");
322    ///
323    /// assert_eq!(field.first_subfield("a").unwrap().content(), "Other text");
324    /// ```
325    pub fn first_subfield_mut(&mut self, code: &str) -> Option<&mut Subfield> {
326        self.subfields.iter_mut().find(|f| f.code() == code)
327    }
328
329    /// True if a subfield with the provided code is present.
330    ///
331    /// # Examples
332    ///
333    /// ```
334    /// use marctk::Field;
335    ///
336    /// let mut field: Field =  Field::new("245").unwrap();
337    /// assert!(!field.has_subfield("a"));
338    ///
339    /// field.add_subfield("a", "My title").unwrap();
340    ///
341    /// assert!(field.has_subfield("a"));
342    /// ```
343    pub fn has_subfield(&self, code: &str) -> bool {
344        self.subfields.iter().any(|f| f.code() == code)
345    }
346
347    /// Get a mutable list of subfields with the provided code.
348    ///
349    /// # Examples
350    ///
351    /// ```
352    /// use marctk::Field;
353    ///
354    /// let mut field: Field =  Field::new("245").unwrap();
355    /// field.add_subfield("a", "First one").unwrap();
356    /// field.add_subfield("a", "Second one").unwrap();
357    ///
358    /// for mut subfield in field.get_subfields_mut("a") {
359    ///   subfield.set_content(subfield.content().to_uppercase());
360    /// }
361    ///
362    /// assert_eq!(field.first_subfield("a").unwrap().content(), "FIRST ONE");
363    /// ```
364    pub fn get_subfields_mut(&mut self, code: &str) -> Vec<&mut Subfield> {
365        self.subfields
366            .iter_mut()
367            .filter(|f| f.code() == code)
368            .collect()
369    }
370
371    /// Adds a new Subfield to this field using the provided code and content.
372    ///
373    /// * `code` - Must have the correct byte count.
374    pub fn add_subfield(
375        &mut self,
376        code: impl Into<String>,
377        content: impl Into<String>,
378    ) -> Result<(), String> {
379        self.subfields.push(Subfield::new(code, content)?);
380        Ok(())
381    }
382
383    /// Remove the first subfield with the specified code.
384    ///
385    /// # Examples
386    ///
387    /// ```
388    /// use marctk::Field;
389    ///
390    /// let mut field: Field =  Field::new("245").unwrap();
391    /// field.add_subfield("a", "First one").unwrap();
392    /// field.add_subfield("a", "Second one").unwrap();
393    /// assert_eq!(field.subfields().len(), 2);
394    ///
395    /// assert_eq!(field.remove_first_subfield("a").unwrap().content(), "First one");
396    /// assert_eq!(field.subfields().len(), 1);
397    /// assert_eq!(field.first_subfield("a").unwrap().content(), "Second one");
398    /// ```
399    pub fn remove_first_subfield(&mut self, code: &str) -> Option<Subfield> {
400        if let Some(index) = self.subfields.iter().position(|s| s.code.eq(code)) {
401            return Some(self.subfields.remove(index));
402        }
403
404        None
405    }
406
407    /// Remove all subfields with the specified code and returns
408    /// the count of removed subfields.
409    ///
410    /// # Examples
411    ///
412    /// ```
413    /// use marctk::Field;
414    /// let mut field = Field::new("505").unwrap();
415    /// let _ = field.add_subfield("t", "Chapter 1 /");
416    /// let _ = field.add_subfield("r", "Cool author --");
417    /// let _ = field.add_subfield("t", "Chapter 2.");
418    /// assert_eq!(field.subfields().len(), 3);
419    ///
420    /// assert_eq!(field.remove_subfields("t"), 2);
421    ///
422    /// assert_eq!(field.subfields().len(), 1);
423    /// ```
424    pub fn remove_subfields(&mut self, code: &str) -> usize {
425        let mut removed = 0;
426
427        while let Some(index) = self.subfields.iter().position(|s| s.code.eq(code)) {
428            self.subfields.remove(index);
429            removed += 1;
430        }
431
432        removed
433    }
434
435    /// # Examples
436    ///
437    /// ```
438    /// use marctk::Field;
439    /// let field = Field::new("505").unwrap();
440    /// assert!(field.matches_spec("505"));
441    /// assert!(field.matches_spec("5xx"));
442    /// assert!(field.matches_spec("50x"));
443    /// assert!(field.matches_spec("5x5"));
444    /// assert!(field.matches_spec("x05"));
445    /// assert!(field.matches_spec("5XX"));
446    ///
447    /// assert!(!field.matches_spec("6xx"));
448    /// assert!(!field.matches_spec("LDR"));
449    /// assert!(!field.matches_spec("invalid spec"));
450    /// ```
451    pub fn matches_spec(&self, spec: &str) -> bool {
452        if spec.len() != 3 {
453            return false;
454        };
455        spec.chars()
456            .zip(self.tag().chars())
457            .all(|(spec_char, tag_char)| {
458                spec_char.eq_ignore_ascii_case(&'x') || spec_char == tag_char
459            })
460    }
461}
462
463/// A MARC record with leader, control fields, and data fields.
464#[derive(Debug, Clone, PartialEq)]
465pub struct Record {
466    leader: String,
467    control_fields: Vec<Controlfield>,
468    fields: Vec<Field>,
469}
470
471impl Default for Record {
472    fn default() -> Self {
473        Self::new()
474    }
475}
476
477impl Record {
478    /// Create a new Record with a default leader and no content.
479    pub fn new() -> Self {
480        Record {
481            leader: DEFAULT_LEADER.to_string(),
482            control_fields: Vec::new(),
483            fields: Vec::new(),
484        }
485    }
486
487    /// Get the leader as a string.
488    pub fn leader(&self) -> &str {
489        &self.leader
490    }
491
492    /// Apply a leader value.
493    ///
494    /// Returns Err if the value is not composed of the correct number
495    /// of bytes.
496    ///
497    /// # Examples
498    ///
499    /// ```
500    /// use marctk::Record;
501    /// let mut record = Record::default();
502    /// assert!(record.set_leader("too short").is_err());
503    /// assert!(record.set_leader("just right              ").is_ok());
504    /// ```
505    pub fn set_leader(&mut self, leader: impl Into<String>) -> Result<(), String> {
506        let leader = leader.into();
507        check_byte_count(&leader, LEADER_SIZE)?;
508        self.leader = leader;
509        Ok(())
510    }
511
512    /// Apply a leader value from a set of bytes
513    ///
514    /// Returns Err if the value is not composed of the correct number
515    /// of bytes.
516    ///
517    /// # Examples
518    ///
519    /// ```
520    /// use marctk::Record;
521    /// let mut record = Record::default();
522    /// assert!(record.set_leader_bytes("too short".as_bytes()).is_err());
523    /// assert!(record.set_leader_bytes("just right              ".as_bytes()).is_ok());
524    /// ```
525    pub fn set_leader_bytes(&mut self, bytes: &[u8]) -> Result<(), String> {
526        let s = std::str::from_utf8(bytes)
527            .map_err(|e| format!("Leader is not a valid UTF-8 string: {e} bytes={bytes:?}"))?;
528        self.set_leader(s)
529    }
530
531    /// Get the full list of control fields.
532    pub fn control_fields(&self) -> &Vec<Controlfield> {
533        &self.control_fields
534    }
535    /// Get the full list of control fields, mutable.
536    pub fn control_fields_mut(&mut self) -> &mut Vec<Controlfield> {
537        &mut self.control_fields
538    }
539    /// Get the full list of fields.
540    pub fn fields(&self) -> &Vec<Field> {
541        &self.fields
542    }
543    /// Get the full list of fields, mutable.
544    pub fn fields_mut(&mut self) -> &mut Vec<Field> {
545        &mut self.fields
546    }
547
548    /// Return a list of control fields with the provided tag.
549    pub fn get_control_fields(&self, tag: &str) -> Vec<&Controlfield> {
550        self.control_fields
551            .iter()
552            .filter(|f| f.tag() == tag)
553            .collect()
554    }
555
556    /// Return a list of fields with the provided tag.
557    pub fn get_fields(&self, tag: &str) -> Vec<&Field> {
558        self.fields.iter().filter(|f| f.tag() == tag).collect()
559    }
560
561    /// Return a mutable list of fields with the provided tag.
562    pub fn get_fields_mut(&mut self, tag: &str) -> Vec<&mut Field> {
563        self.fields.iter_mut().filter(|f| f.tag() == tag).collect()
564    }
565
566    /// Add a new control field with the provided tag and content and
567    /// insert it in tag order.
568    ///
569    /// Controlfields are those with tag 001 .. 009
570    ///
571    /// Err if the tag is invalid.
572    ///
573    /// # Examples
574    ///
575    /// ```
576    /// use marctk::Record;
577    /// let mut record = Record::default();
578    /// assert!(record.add_control_field("011", "foo").is_err());
579    /// assert!(record.add_control_field("002", "bar").is_ok());
580    /// assert!(record.add_control_field("001", "bar").is_ok());
581    ///
582    /// // should be sorted by tag.
583    /// assert_eq!(record.control_fields()[0].tag(), "001");
584    /// ```
585    pub fn add_control_field(&mut self, tag: &str, content: &str) -> Result<(), String> {
586        self.insert_control_field(Controlfield::new(tag, content)?);
587        Ok(())
588    }
589
590    /// Insert a [`Controlfield`] in tag order.
591    pub fn insert_control_field(&mut self, field: Controlfield) {
592        if let Some(idx) = self
593            .control_fields()
594            .iter()
595            .position(|f| f.tag() > field.tag())
596        {
597            self.control_fields_mut().insert(idx, field);
598        } else {
599            self.control_fields_mut().push(field);
600        }
601    }
602
603    /// Insert a [`Field`] in tag order
604    pub fn insert_data_field(&mut self, field: Field) -> usize {
605        if let Some(idx) = self.fields().iter().position(|f| f.tag() > field.tag()) {
606            self.fields_mut().insert(idx, field);
607            idx
608        } else {
609            self.fields_mut().push(field);
610            0
611        }
612    }
613
614    /// Create a new Field with the provided tag, insert it into the
615    /// record in tag order, then return a mut ref to the new field.
616    ///
617    /// # Examples
618    ///
619    /// ```
620    /// use marctk::Record;
621    /// let mut record = Record::default();
622    /// assert!(record.add_data_field("245").is_ok());
623    /// assert!(record.add_data_field("240").is_ok());
624    /// assert!(record.add_data_field("1234").is_err());
625    ///
626    /// assert_eq!(record.fields()[0].tag(), "240");
627    /// ```
628    pub fn add_data_field(&mut self, tag: impl Into<String>) -> Result<&mut Field, String> {
629        let pos = self.insert_data_field(Field::new(tag)?);
630        Ok(self.fields_mut().get_mut(pos).unwrap())
631    }
632
633    /// Returns a list of values for the specified tag and subfield.
634    ///
635    /// # Examples
636    ///
637    /// ```
638    /// use marctk::Record;
639    /// let mut record = Record::default();
640    /// let field = record.add_data_field("650").expect("added field");
641    /// field.add_subfield("a", "foo");
642    /// field.add_subfield("a", "bar");
643    ///
644    /// let field = record.add_data_field("650").expect("added field");
645    /// field.add_subfield("a", "baz");
646    ///
647    /// let values = record.get_field_values("650", "a");
648    ///
649    /// assert_eq!(values.len(), 3);
650    /// assert_eq!(values[1], "bar");
651    /// ```
652    pub fn get_field_values(&self, tag: &str, sfcode: &str) -> Vec<&str> {
653        let mut vec = Vec::new();
654        for field in self.get_fields(tag) {
655            for sf in field.get_subfields(sfcode) {
656                vec.push(sf.content.as_str());
657            }
658        }
659        vec
660    }
661
662    /// Remove all occurrences of control fields with the provided tag.
663    ///
664    /// # Examples
665    ///
666    /// ```
667    /// use marctk::Record;
668    /// let mut record = Record::default();
669    /// let _ = record.add_control_field("008", "stuffandsuch").unwrap();
670    /// let _ = record.add_control_field("008", "morestuffandsuch").unwrap();
671    ///
672    /// assert_eq!(record.get_control_fields("008").len(), 2);
673    ///
674    /// record.remove_control_fields("007");
675    /// assert_eq!(record.get_control_fields("008").len(), 2);
676    ///
677    /// record.remove_control_fields("008");
678    /// assert!(record.get_fields("008").is_empty());
679    /// ```
680    pub fn remove_control_fields(&mut self, tag: &str) {
681        while let Some(pos) = self.control_fields.iter().position(|f| f.tag() == tag) {
682            self.control_fields.remove(pos);
683        }
684    }
685
686    /// Remove all occurrences of fields with the provided tag.
687    ///
688    /// # Examples
689    ///
690    /// ```
691    /// use marctk::Record;
692    /// let mut record = Record::default();
693    /// let field = record.add_data_field("650").unwrap();
694    /// field.add_subfield("a", "Art");
695    /// field.add_subfield("a", "Science");
696    ///
697    /// assert_eq!(record.get_fields("650").len(), 1);
698    ///
699    /// record.remove_fields("200");
700    /// assert_eq!(record.get_fields("650").len(), 1);
701    ///
702    /// record.remove_fields("650");
703    /// assert!(record.get_fields("650").is_empty());
704    /// ```
705    pub fn remove_fields(&mut self, tag: &str) {
706        while let Some(pos) = self.fields.iter().position(|f| f.tag() == tag) {
707            self.fields.remove(pos);
708        }
709    }
710
711    /// Extract MARC fields using a range of tags or a specification
712    /// inspired by [ruby-marc](https://github.com/ruby-marc/ruby-marc/),
713    /// [SolrMarc](https://github.com/solrmarc/solrmarc/wiki/Basic-field-based-extraction-specifications),
714    /// and [traject](https://github.com/traject/traject).
715    ///
716    /// # Specification syntax
717    ///
718    /// * A three-character tag will match any field that has that tag, for example `650` would
719    ///   only match fields with the tag `650`.
720    /// * The letter `x` (or upper case `X`) can be used as a wildcard, for example `2xx` would
721    ///   match any field with a tag that starts with the character `2`.
722    /// * Multiple specifications can be combined with a `:` between them, for example
723    ///   `4xx:52x:901` would match any field with tag `901` or a tag that begins with
724    ///   `4` or `52`.
725    ///
726    /// # Examples
727    ///
728    /// ```
729    /// use marctk::Record;
730    /// let record = Record::from_breaker(
731    ///     r#"=600 10$aZhang, Heng, $d 78-139 $v Juvenile literature.
732    /// =650 \0$aEarthquakes $v Juvenile literature.
733    /// =955 \0$a1234"#
734    /// ).unwrap();
735    ///
736    /// let mut some_fields = record.extract_fields(600..=699);
737    /// assert_eq!(some_fields.next().unwrap().tag(), "600");
738    /// assert_eq!(some_fields.next().unwrap().tag(), "650");
739    /// assert!(some_fields.next().is_none());
740    ///
741    /// let mut more_fields = record.extract_fields("9xx");
742    /// assert_eq!(more_fields.next().unwrap().tag(), "955");
743    /// assert!(more_fields.next().is_none());
744    ///
745    /// let mut you_can_combine_specs = record.extract_fields("600:9xx");
746    /// assert_eq!(you_can_combine_specs.next().unwrap().tag(), "600");
747    /// assert_eq!(you_can_combine_specs.next().unwrap().tag(), "955");
748    /// assert!(you_can_combine_specs.next().is_none());
749    /// ```
750    pub fn extract_fields(
751        &self,
752        query: impl Into<crate::query::FieldQuery>,
753    ) -> impl Iterator<Item = &Field> {
754        self.fields().iter().filter(query.into().field_filter)
755    }
756
757    /// Mutable variant of [`extract_fields()`].
758    ///
759    /// # Examples
760    ///
761    /// ```
762    /// use marctk::Record;
763    /// let mut record = Record::from_breaker(
764    ///     r#"=600 10$aZhang, Heng, $d 78-139 $v Juvenile literature.
765    /// =650 \0$aEarthquakes $v Juvenile literature.
766    /// =955 \0$a1234"#
767    /// ).unwrap();
768    ///
769    /// for field in record.extract_fields_mut(600..=699) {
770    ///     field.first_subfield_mut("a").unwrap().set_content("HELLOOO");
771    ///     field.add_subfield("x", "X CONTENT");
772    /// }
773    ///
774    /// // This is kinda lazy, but you get the idea.
775    /// assert!(record.to_breaker().contains("$xX CONTENT"));
776    /// ```
777    pub fn extract_fields_mut(
778        &mut self,
779        query: impl Into<crate::query::FieldQueryMut>,
780    ) -> impl Iterator<Item = &mut Field> {
781        self.fields_mut()
782            .iter_mut()
783            .filter(query.into().field_filter)
784    }
785
786    /// Extract only certain desired subfields from fields using a specification
787    /// inspired by [ruby-marc](https://github.com/ruby-marc/ruby-marc/),
788    /// [SolrMarc](https://github.com/solrmarc/solrmarc/wiki/Basic-field-based-extraction-specifications),
789    /// and [traject](https://github.com/traject/traject).
790    ///
791    /// # Specification syntax
792    ///
793    /// * A three-character tag will match any field that has that tag, for example `650` would
794    ///   only match fields with the tag `650`.
795    /// * The letter `x` (or upper case `X`) can be used as a wildcard, for example `2xx` would
796    ///   match any field with a tag that starts with the character `2`.
797    /// * Tags are optionally followed by indicators in parentheses.  For example, `650(00)` would
798    ///   match fields with first and second indicators equal to zero.  `650(**)` would match
799    ///   fields with any indicators, which is the same as omitting the indicators from the
800    ///   specification altogether.  `650(_0)` and `650( 0)` both match fields with an empty
801    ///   first indicator, although you may find using the `_` version clearer.
802    /// * Each specification can optionally end with a list of subfield codes.  For example,
803    ///   `245abc` would match 245 fields and select only subfields `a`, `b`, and `c`.
804    /// * Multiple specifications can be combined with a `:` between them, for example
805    ///   `60x(*0)a:650av:653` would select
806    ///     * subfield `a` from any field that begins with `60` and has second indicator `0`, and
807    ///     * subfields `a` and `v` from any field with tag `650`, and
808    ///     * any subfield from any field with tag `653`
809    ///
810    /// Returns an iterator over fields.  You can call the `subfields()` method on the result
811    /// to iterate through the requested subfields.
812    ///
813    /// # Examples
814    ///
815    /// ```
816    /// use marctk::Record;
817    /// let record = Record::from_breaker(
818    ///     r#"=600 10$aZhang, Heng, $d 78-139 $v Juvenile literature.
819    /// =650 \0$aEarthquakes $v Juvenile literature.
820    /// =955 \0$a1234"#
821    /// ).unwrap();
822    ///
823    /// let fields = record.extract_partial_fields("600a");
824    /// assert_eq!(fields.len(), 1);
825    ///
826    /// let field = fields.first().unwrap();
827    /// assert_eq!(field.tag(), "600");
828    /// assert_eq!(field.subfields().len(), 1);
829    /// assert_eq!(field.subfields()[0].code(), "a");
830    /// ```
831    ///
832    /// An example with indicators specified:
833    ///
834    /// ```
835    /// use marctk::Record;
836    /// let record = Record::from_breaker(
837    ///     r#"=650 \0$aEarthquakes $v Juvenile literature.
838    ///=650 \0$aEarthquake damage $v Juvenile literature.
839    ///=650 \4$aNon-LCSH term"#
840    /// ).unwrap();
841    ///
842    /// let fields = record.extract_partial_fields("650(*0)a");
843    /// let terms: Vec<_> = fields.into_iter().map(|f|f.first_subfield("a").unwrap().content().to_string()).collect();
844    /// assert_eq!(terms, vec!["Earthquakes ", "Earthquake damage "]);
845    /// ```
846    ///
847    /// An example with multiple specifications (which means more potential
848    /// matching fields and subfields):
849    ///
850    /// ```
851    /// use marctk::Record;
852    /// let record = Record::from_breaker(
853    ///     r#"=600 10$aZhang, Heng, $d 78-139 $v Juvenile literature.
854    ///=650 \0$aAmusement parks $vComic books, strips, etc.
855    ///=655 \7$aHorror comics. $2lcgft
856    ///=655 \7$aGraphic novels. $2lcgft"#
857    /// ).unwrap();
858    ///
859    /// let genre_query = "600(*0)vx:650(*0)vx:655(*0)avx:655(*7)avx";
860    /// let fields = record.extract_partial_fields(genre_query);
861    /// let matching_subfields = fields.iter().fold(Vec::new(), |mut accumulator, field| {
862    ///    accumulator.extend(field.subfields());
863    ///    accumulator
864    /// });
865    /// let terms: Vec<&str> = matching_subfields.iter().map(|sf| sf.content()).collect();
866    /// assert_eq!(
867    ///     terms,
868    ///     vec![" Juvenile literature.", "Comic books, strips, etc.", "Horror comics. ", "Graphic novels. "]
869    /// );
870    /// ```
871    pub fn extract_partial_fields(&self, query: &str) -> Vec<Field> {
872        let specs: Vec<ComplexSpecification> =
873            query.split(':').map(ComplexSpecification::from).collect();
874        let matching_fields = self
875            .fields()
876            .iter()
877            .filter(|f| specs.iter().any(|spec| spec.matches_field(f)));
878        matching_fields
879            .map(|field| {
880                let mut new_field = field.clone();
881                new_field
882                    .subfields_mut()
883                    .retain(|sf| specs.iter().any(|spec| spec.subfield_filter()(sf, field)));
884                new_field
885            })
886            .collect()
887    }
888
889    /// Extract only certain desired subfields from fields using a specification.
890    /// See [`extract_partial_fields`] for the details of the specification syntax.
891    ///
892    /// # Examples
893    ///
894    /// ```
895    /// use marctk::Record;
896    /// let record = Record::from_breaker(
897    ///     r#"=600 10$aZhang, Heng, $d 78-139 $v Juvenile literature.
898    ///=650 \0$aAmusement parks $vComic books, strips, etc.
899    ///=655 \7$aHorror comics. $2lcgft
900    ///=655 \7$aGraphic novels. $2lcgft"#
901    /// ).unwrap();
902    ///
903    /// let genre_query = "600(*0)vx:650(*0)vx:655(*0)avx:655(*7)avx";
904    /// let values = record.extract_values(genre_query);
905    /// assert_eq!(
906    ///     values,
907    ///     vec![" Juvenile literature.", "Comic books, strips, etc.", "Horror comics. ", "Graphic novels. "]
908    /// );
909    /// ```
910    ///
911    /// [`extract_partial_fields`]: crate::Record::extract_partial_fields
912    pub fn extract_values(&self, query: &str) -> Vec<String> {
913        self.extract_partial_fields(query)
914            .iter()
915            .fold(Vec::new(), |mut accumulator, field| {
916                accumulator.extend(field.subfields());
917                accumulator
918            })
919            .iter()
920            .map(|sf| sf.content().to_owned())
921            .collect()
922    }
923}