marctk/record.rs
1//! Base MARC record model and associated components.
2
3use crate::query::ComplexSpecification;
4const TAG_SIZE: usize = 3;
5const LEADER_SIZE: usize = 24;
6const CODE_SIZE: usize = 1;
7const DEFAULT_LEADER: &str = " ";
8const DEFAULT_INDICATOR: &str = " ";
9
10/// Verifies the provided string is composed of 'len' number of bytes.
11fn check_byte_count(s: &str, len: usize) -> Result<(), String> {
12 let byte_len = s.len();
13 if byte_len != len {
14 return Err(format!(
15 "Invalid byte count for string s={s} wanted={len} found={byte_len}"
16 ));
17 }
18 Ok(())
19}
20
21/// MARC Control Field whose tag value is < "010"
22#[derive(Debug, Clone, PartialEq)]
23pub struct Controlfield {
24 tag: String,
25 content: String,
26}
27
28impl Controlfield {
29 /// Create a Controlfield with the provided tag and content.
30 ///
31 /// * `tag` - Must have the correct byte count.
32 ///
33 /// # Examples
34 ///
35 /// ```
36 /// let control_field = marctk::Controlfield::new("008", "12345").unwrap();
37 /// assert_eq!(control_field.tag(), "008");
38 /// ```
39 /// ```
40 /// let control_field = marctk::Controlfield::new("010", "12345");
41 ///
42 /// assert_eq!(control_field.is_err(), true);
43 /// assert_eq!(control_field.unwrap_err(), "Invalid Controlfield tag: 010");
44 /// ```
45 pub fn new(tag: impl Into<String>, content: impl Into<String>) -> Result<Self, String> {
46 let tag = tag.into();
47 check_byte_count(&tag, TAG_SIZE)?;
48
49 if tag.as_str() < "000" || tag.as_str() > "009" {
50 return Err(format!("Invalid Controlfield tag: {tag}"));
51 }
52
53 Ok(Controlfield {
54 tag,
55 content: content.into(),
56 })
57 }
58
59 /// Get the tag
60 ///
61 /// # Examples
62 ///
63 /// ```
64 /// use marctk::Controlfield;
65 ///
66 /// let control_field = Controlfield::new("008", "12345").unwrap();
67 /// assert_eq!(control_field.tag(), "008");
68 /// ```
69 pub fn tag(&self) -> &str {
70 &self.tag
71 }
72
73 /// Get the content
74 ///
75 /// # Examples
76 ///
77 /// ```
78 /// use marctk::Controlfield;
79 ///
80 /// let control_field = Controlfield::new("008", "12345").unwrap();
81 /// assert_eq!(control_field.content(), "12345");
82 /// ```
83 pub fn content(&self) -> &str {
84 &self.content
85 }
86
87 /// Set the Controlfield content.
88 ///
89 /// # Examples
90 ///
91 /// ```
92 /// use marctk::Controlfield;
93 ///
94 /// let mut control_field = Controlfield::new("008", "12345").unwrap();
95 /// control_field.set_content("6789");
96 /// assert_eq!(control_field.content(), "6789");
97 /// ```
98 pub fn set_content(&mut self, content: impl Into<String>) {
99 self.content = content.into();
100 }
101}
102
103/// A single subfield code + value pair
104#[derive(Debug, Clone, PartialEq)]
105pub struct Subfield {
106 code: String,
107 content: String,
108}
109
110impl Subfield {
111 /// Create a Subfield with the provided code and content.
112 ///
113 /// * `code` - Must have the correct byte count.
114 ///
115 /// # Examples
116 ///
117 /// ```
118 /// use marctk::Subfield;
119 /// let subfield: Subfield = match Subfield::new("a", "Στη σκιά της πεταλούδας") {
120 /// Ok(sf) => sf,
121 /// Err(e) => panic!("Subfield::new() failed with: {}", e),
122 /// };
123 /// assert_eq!(subfield.content(), "Στη σκιά της πεταλούδας");
124 /// ```
125 ///
126 /// ```should_panic
127 /// use marctk::Subfield;
128 /// Subfield::new("🦋", "Στη σκιά της πεταλούδας").unwrap();
129 /// ```
130 ///
131 pub fn new(code: impl Into<String>, content: impl Into<String>) -> Result<Self, String> {
132 let code = code.into();
133 check_byte_count(&code, CODE_SIZE)?;
134 Ok(Subfield {
135 code,
136 content: content.into(),
137 })
138 }
139 /// Get the Subfield content.
140 pub fn content(&self) -> &str {
141 &self.content
142 }
143 /// Set the Subfield content.
144 ///
145 /// # Examples
146 ///
147 /// ```
148 /// use marctk::Subfield;
149 /// let mut subfield: Subfield = Subfield::new("a", "potato").unwrap();
150 /// subfield.set_content("cheese");
151 /// assert_eq!(subfield.content(), "cheese");
152 /// ```
153 ///
154 pub fn set_content(&mut self, content: impl Into<String>) {
155 self.content = content.into();
156 }
157 /// Get the Subfield code.
158 ///
159 /// # Examples
160 ///
161 /// ```
162 /// use marctk::Subfield;
163 /// let subfield: Subfield = Subfield::new("a", "potato").unwrap();
164 /// assert_eq!(subfield.code(), "a");
165 /// ```
166 ///
167 pub fn code(&self) -> &str {
168 &self.code
169 }
170 /// Set the Subfield code.
171 ///
172 /// # Examples
173 ///
174 /// ```
175 /// use marctk::Subfield;
176 /// let mut subfield: Subfield = Subfield::new("a", "potato").unwrap();
177 /// subfield.set_code("q");
178 /// assert_eq!(subfield.code(), "q");
179 /// ```
180 ///
181 /// ```should_panic
182 /// use marctk::Subfield;
183 /// let mut subfield: Subfield = Subfield::new("a", "potato").unwrap();
184 /// subfield.set_code("🥔").unwrap();
185 /// ```
186 ///
187 pub fn set_code(&mut self, code: impl Into<String>) -> Result<(), String> {
188 let code: String = code.into();
189 check_byte_count(&code, CODE_SIZE)?;
190 self.code = code;
191 Ok(())
192 }
193}
194
195/// A MARC Data Field with tag, indicators, and subfields.
196#[derive(Debug, Clone, PartialEq)]
197pub struct Field {
198 tag: String,
199 ind1: Option<String>,
200 ind2: Option<String>,
201 subfields: Vec<Subfield>,
202}
203
204impl Field {
205 /// Create a Field with the provided tag.
206 ///
207 /// * `tag` - Must have the correct byte count.
208 ///
209 /// # Examples
210 ///
211 /// ```
212 /// use marctk::Field;
213 ///
214 /// let field: Field = match Field::new("245") {
215 /// Ok(f) => f,
216 /// Err(e) => panic!("Field::new() failed with: {}", e),
217 /// };
218 /// assert_eq!(field.tag(), "245");
219 /// assert_eq!(field.ind1(), " ");
220 /// assert_eq!(field.ind2(), " ");
221 /// assert_eq!(field.subfields().len(), 0);
222 /// ```
223 ///
224 pub fn new(tag: impl Into<String>) -> Result<Self, String> {
225 let tag = tag.into();
226 check_byte_count(&tag, TAG_SIZE)?;
227
228 if tag.as_str() < "010" || tag.as_str() > "999" {
229 // Of note, OCLC sometimes creates MARC records with data
230 // fields using the tag "DAT". For our purposes, the only
231 // thing that really matters is the byte count (checked
232 // above), so just warn for unexpected tags.
233 eprintln!("Unexpected tag for data field: '{tag}'");
234 }
235
236 Ok(Field {
237 tag,
238 ind1: None,
239 ind2: None,
240 subfields: Vec::new(),
241 })
242 }
243 /// Get the tag
244 pub fn tag(&self) -> &str {
245 &self.tag
246 }
247 /// Get the value of indicator-1, defaulting to DEFAULT_INDICATOR.
248 pub fn ind1(&self) -> &str {
249 self.ind1.as_deref().unwrap_or(DEFAULT_INDICATOR)
250 }
251 /// Get the value of indicator-2, defaulting to DEFAULT_INDICATOR.
252 pub fn ind2(&self) -> &str {
253 self.ind2.as_deref().unwrap_or(DEFAULT_INDICATOR)
254 }
255 /// Get the full list of subfields
256 pub fn subfields(&self) -> &Vec<Subfield> {
257 &self.subfields
258 }
259 /// Get a mutable list of subfields.
260 pub fn subfields_mut(&mut self) -> &mut Vec<Subfield> {
261 &mut self.subfields
262 }
263
264 /// Set the indicator-1 value.
265 ///
266 /// * `ind` - Must have the correct byte count.
267 pub fn set_ind1(&mut self, ind: impl Into<String>) -> Result<(), String> {
268 let ind = ind.into();
269 check_byte_count(&ind, CODE_SIZE)?;
270 self.ind1 = Some(ind);
271 Ok(())
272 }
273
274 /// Set the indicator-2 value.
275 ///
276 /// * `ind` - Must have the correct byte count.
277 pub fn set_ind2(&mut self, ind: impl Into<String>) -> Result<(), String> {
278 let ind = ind.into();
279 check_byte_count(&ind, CODE_SIZE)?;
280 self.ind2 = Some(ind);
281 Ok(())
282 }
283
284 /// Get a list of subfields with the provided code.
285 pub fn get_subfields(&self, code: &str) -> Vec<&Subfield> {
286 self.subfields.iter().filter(|f| f.code() == code).collect()
287 }
288
289 /// Get the first occurrence of the subfield with the provided code,
290 /// if one is present.
291 ///
292 /// # Examples
293 ///
294 /// ```
295 /// use marctk::Field;
296 ///
297 /// let mut field: Field = Field::new("245").unwrap();
298 /// assert!(field.first_subfield("a").is_none());
299 ///
300 /// field.add_subfield("a", "First one").unwrap();
301 /// field.add_subfield("a", "Second one").unwrap();
302 ///
303 /// assert_eq!(field.first_subfield("a").unwrap().content(), "First one");
304 /// ```
305 pub fn first_subfield(&self, code: &str) -> Option<&Subfield> {
306 self.subfields.iter().find(|f| f.code() == code)
307 }
308
309 /// Mutable variant of ['first_subfield()`].
310 ///
311 /// # Examples
312 ///
313 /// ```
314 /// use marctk::Field;
315 ///
316 /// let mut field: Field = Field::new("245").unwrap();
317 /// assert!(field.first_subfield("a").is_none());
318 ///
319 /// field.add_subfield("a", "First one").unwrap();
320 ///
321 /// field.first_subfield_mut("a").unwrap().set_content("Other text");
322 ///
323 /// assert_eq!(field.first_subfield("a").unwrap().content(), "Other text");
324 /// ```
325 pub fn first_subfield_mut(&mut self, code: &str) -> Option<&mut Subfield> {
326 self.subfields.iter_mut().find(|f| f.code() == code)
327 }
328
329 /// True if a subfield with the provided code is present.
330 ///
331 /// # Examples
332 ///
333 /// ```
334 /// use marctk::Field;
335 ///
336 /// let mut field: Field = Field::new("245").unwrap();
337 /// assert!(!field.has_subfield("a"));
338 ///
339 /// field.add_subfield("a", "My title").unwrap();
340 ///
341 /// assert!(field.has_subfield("a"));
342 /// ```
343 pub fn has_subfield(&self, code: &str) -> bool {
344 self.subfields.iter().any(|f| f.code() == code)
345 }
346
347 /// Get a mutable list of subfields with the provided code.
348 ///
349 /// # Examples
350 ///
351 /// ```
352 /// use marctk::Field;
353 ///
354 /// let mut field: Field = Field::new("245").unwrap();
355 /// field.add_subfield("a", "First one").unwrap();
356 /// field.add_subfield("a", "Second one").unwrap();
357 ///
358 /// for mut subfield in field.get_subfields_mut("a") {
359 /// subfield.set_content(subfield.content().to_uppercase());
360 /// }
361 ///
362 /// assert_eq!(field.first_subfield("a").unwrap().content(), "FIRST ONE");
363 /// ```
364 pub fn get_subfields_mut(&mut self, code: &str) -> Vec<&mut Subfield> {
365 self.subfields
366 .iter_mut()
367 .filter(|f| f.code() == code)
368 .collect()
369 }
370
371 /// Adds a new Subfield to this field using the provided code and content.
372 ///
373 /// * `code` - Must have the correct byte count.
374 pub fn add_subfield(
375 &mut self,
376 code: impl Into<String>,
377 content: impl Into<String>,
378 ) -> Result<(), String> {
379 self.subfields.push(Subfield::new(code, content)?);
380 Ok(())
381 }
382
383 /// Remove the first subfield with the specified code.
384 ///
385 /// # Examples
386 ///
387 /// ```
388 /// use marctk::Field;
389 ///
390 /// let mut field: Field = Field::new("245").unwrap();
391 /// field.add_subfield("a", "First one").unwrap();
392 /// field.add_subfield("a", "Second one").unwrap();
393 /// assert_eq!(field.subfields().len(), 2);
394 ///
395 /// assert_eq!(field.remove_first_subfield("a").unwrap().content(), "First one");
396 /// assert_eq!(field.subfields().len(), 1);
397 /// assert_eq!(field.first_subfield("a").unwrap().content(), "Second one");
398 /// ```
399 pub fn remove_first_subfield(&mut self, code: &str) -> Option<Subfield> {
400 if let Some(index) = self.subfields.iter().position(|s| s.code.eq(code)) {
401 return Some(self.subfields.remove(index));
402 }
403
404 None
405 }
406
407 /// Remove all subfields with the specified code and returns
408 /// the count of removed subfields.
409 ///
410 /// # Examples
411 ///
412 /// ```
413 /// use marctk::Field;
414 /// let mut field = Field::new("505").unwrap();
415 /// let _ = field.add_subfield("t", "Chapter 1 /");
416 /// let _ = field.add_subfield("r", "Cool author --");
417 /// let _ = field.add_subfield("t", "Chapter 2.");
418 /// assert_eq!(field.subfields().len(), 3);
419 ///
420 /// assert_eq!(field.remove_subfields("t"), 2);
421 ///
422 /// assert_eq!(field.subfields().len(), 1);
423 /// ```
424 pub fn remove_subfields(&mut self, code: &str) -> usize {
425 let mut removed = 0;
426
427 while let Some(index) = self.subfields.iter().position(|s| s.code.eq(code)) {
428 self.subfields.remove(index);
429 removed += 1;
430 }
431
432 removed
433 }
434
435 /// # Examples
436 ///
437 /// ```
438 /// use marctk::Field;
439 /// let field = Field::new("505").unwrap();
440 /// assert!(field.matches_spec("505"));
441 /// assert!(field.matches_spec("5xx"));
442 /// assert!(field.matches_spec("50x"));
443 /// assert!(field.matches_spec("5x5"));
444 /// assert!(field.matches_spec("x05"));
445 /// assert!(field.matches_spec("5XX"));
446 ///
447 /// assert!(!field.matches_spec("6xx"));
448 /// assert!(!field.matches_spec("LDR"));
449 /// assert!(!field.matches_spec("invalid spec"));
450 /// ```
451 pub fn matches_spec(&self, spec: &str) -> bool {
452 if spec.len() != 3 {
453 return false;
454 };
455 spec.chars()
456 .zip(self.tag().chars())
457 .all(|(spec_char, tag_char)| {
458 spec_char.eq_ignore_ascii_case(&'x') || spec_char == tag_char
459 })
460 }
461}
462
463/// A MARC record with leader, control fields, and data fields.
464#[derive(Debug, Clone, PartialEq)]
465pub struct Record {
466 leader: String,
467 control_fields: Vec<Controlfield>,
468 fields: Vec<Field>,
469}
470
471impl Default for Record {
472 fn default() -> Self {
473 Self::new()
474 }
475}
476
477impl Record {
478 /// Create a new Record with a default leader and no content.
479 pub fn new() -> Self {
480 Record {
481 leader: DEFAULT_LEADER.to_string(),
482 control_fields: Vec::new(),
483 fields: Vec::new(),
484 }
485 }
486
487 /// Get the leader as a string.
488 pub fn leader(&self) -> &str {
489 &self.leader
490 }
491
492 /// Apply a leader value.
493 ///
494 /// Returns Err if the value is not composed of the correct number
495 /// of bytes.
496 ///
497 /// # Examples
498 ///
499 /// ```
500 /// use marctk::Record;
501 /// let mut record = Record::default();
502 /// assert!(record.set_leader("too short").is_err());
503 /// assert!(record.set_leader("just right ").is_ok());
504 /// ```
505 pub fn set_leader(&mut self, leader: impl Into<String>) -> Result<(), String> {
506 let leader = leader.into();
507 check_byte_count(&leader, LEADER_SIZE)?;
508 self.leader = leader;
509 Ok(())
510 }
511
512 /// Apply a leader value from a set of bytes
513 ///
514 /// Returns Err if the value is not composed of the correct number
515 /// of bytes.
516 ///
517 /// # Examples
518 ///
519 /// ```
520 /// use marctk::Record;
521 /// let mut record = Record::default();
522 /// assert!(record.set_leader_bytes("too short".as_bytes()).is_err());
523 /// assert!(record.set_leader_bytes("just right ".as_bytes()).is_ok());
524 /// ```
525 pub fn set_leader_bytes(&mut self, bytes: &[u8]) -> Result<(), String> {
526 let s = std::str::from_utf8(bytes)
527 .map_err(|e| format!("Leader is not a valid UTF-8 string: {e} bytes={bytes:?}"))?;
528 self.set_leader(s)
529 }
530
531 /// Get the full list of control fields.
532 pub fn control_fields(&self) -> &Vec<Controlfield> {
533 &self.control_fields
534 }
535 /// Get the full list of control fields, mutable.
536 pub fn control_fields_mut(&mut self) -> &mut Vec<Controlfield> {
537 &mut self.control_fields
538 }
539 /// Get the full list of fields.
540 pub fn fields(&self) -> &Vec<Field> {
541 &self.fields
542 }
543 /// Get the full list of fields, mutable.
544 pub fn fields_mut(&mut self) -> &mut Vec<Field> {
545 &mut self.fields
546 }
547
548 /// Return a list of control fields with the provided tag.
549 pub fn get_control_fields(&self, tag: &str) -> Vec<&Controlfield> {
550 self.control_fields
551 .iter()
552 .filter(|f| f.tag() == tag)
553 .collect()
554 }
555
556 /// Return a list of fields with the provided tag.
557 pub fn get_fields(&self, tag: &str) -> Vec<&Field> {
558 self.fields.iter().filter(|f| f.tag() == tag).collect()
559 }
560
561 /// Return a mutable list of fields with the provided tag.
562 pub fn get_fields_mut(&mut self, tag: &str) -> Vec<&mut Field> {
563 self.fields.iter_mut().filter(|f| f.tag() == tag).collect()
564 }
565
566 /// Add a new control field with the provided tag and content and
567 /// insert it in tag order.
568 ///
569 /// Controlfields are those with tag 001 .. 009
570 ///
571 /// Err if the tag is invalid.
572 ///
573 /// # Examples
574 ///
575 /// ```
576 /// use marctk::Record;
577 /// let mut record = Record::default();
578 /// assert!(record.add_control_field("011", "foo").is_err());
579 /// assert!(record.add_control_field("002", "bar").is_ok());
580 /// assert!(record.add_control_field("001", "bar").is_ok());
581 ///
582 /// // should be sorted by tag.
583 /// assert_eq!(record.control_fields()[0].tag(), "001");
584 /// ```
585 pub fn add_control_field(&mut self, tag: &str, content: &str) -> Result<(), String> {
586 self.insert_control_field(Controlfield::new(tag, content)?);
587 Ok(())
588 }
589
590 /// Insert a [`Controlfield`] in tag order.
591 pub fn insert_control_field(&mut self, field: Controlfield) {
592 if let Some(idx) = self
593 .control_fields()
594 .iter()
595 .position(|f| f.tag() > field.tag())
596 {
597 self.control_fields_mut().insert(idx, field);
598 } else {
599 self.control_fields_mut().push(field);
600 }
601 }
602
603 /// Insert a [`Field`] in tag order
604 pub fn insert_data_field(&mut self, field: Field) -> usize {
605 if let Some(idx) = self.fields().iter().position(|f| f.tag() > field.tag()) {
606 self.fields_mut().insert(idx, field);
607 idx
608 } else {
609 self.fields_mut().push(field);
610 0
611 }
612 }
613
614 /// Create a new Field with the provided tag, insert it into the
615 /// record in tag order, then return a mut ref to the new field.
616 ///
617 /// # Examples
618 ///
619 /// ```
620 /// use marctk::Record;
621 /// let mut record = Record::default();
622 /// assert!(record.add_data_field("245").is_ok());
623 /// assert!(record.add_data_field("240").is_ok());
624 /// assert!(record.add_data_field("1234").is_err());
625 ///
626 /// assert_eq!(record.fields()[0].tag(), "240");
627 /// ```
628 pub fn add_data_field(&mut self, tag: impl Into<String>) -> Result<&mut Field, String> {
629 let pos = self.insert_data_field(Field::new(tag)?);
630 Ok(self.fields_mut().get_mut(pos).unwrap())
631 }
632
633 /// Returns a list of values for the specified tag and subfield.
634 ///
635 /// # Examples
636 ///
637 /// ```
638 /// use marctk::Record;
639 /// let mut record = Record::default();
640 /// let field = record.add_data_field("650").expect("added field");
641 /// field.add_subfield("a", "foo");
642 /// field.add_subfield("a", "bar");
643 ///
644 /// let field = record.add_data_field("650").expect("added field");
645 /// field.add_subfield("a", "baz");
646 ///
647 /// let values = record.get_field_values("650", "a");
648 ///
649 /// assert_eq!(values.len(), 3);
650 /// assert_eq!(values[1], "bar");
651 /// ```
652 pub fn get_field_values(&self, tag: &str, sfcode: &str) -> Vec<&str> {
653 let mut vec = Vec::new();
654 for field in self.get_fields(tag) {
655 for sf in field.get_subfields(sfcode) {
656 vec.push(sf.content.as_str());
657 }
658 }
659 vec
660 }
661
662 /// Remove all occurrences of control fields with the provided tag.
663 ///
664 /// # Examples
665 ///
666 /// ```
667 /// use marctk::Record;
668 /// let mut record = Record::default();
669 /// let _ = record.add_control_field("008", "stuffandsuch").unwrap();
670 /// let _ = record.add_control_field("008", "morestuffandsuch").unwrap();
671 ///
672 /// assert_eq!(record.get_control_fields("008").len(), 2);
673 ///
674 /// record.remove_control_fields("007");
675 /// assert_eq!(record.get_control_fields("008").len(), 2);
676 ///
677 /// record.remove_control_fields("008");
678 /// assert!(record.get_fields("008").is_empty());
679 /// ```
680 pub fn remove_control_fields(&mut self, tag: &str) {
681 while let Some(pos) = self.control_fields.iter().position(|f| f.tag() == tag) {
682 self.control_fields.remove(pos);
683 }
684 }
685
686 /// Remove all occurrences of fields with the provided tag.
687 ///
688 /// # Examples
689 ///
690 /// ```
691 /// use marctk::Record;
692 /// let mut record = Record::default();
693 /// let field = record.add_data_field("650").unwrap();
694 /// field.add_subfield("a", "Art");
695 /// field.add_subfield("a", "Science");
696 ///
697 /// assert_eq!(record.get_fields("650").len(), 1);
698 ///
699 /// record.remove_fields("200");
700 /// assert_eq!(record.get_fields("650").len(), 1);
701 ///
702 /// record.remove_fields("650");
703 /// assert!(record.get_fields("650").is_empty());
704 /// ```
705 pub fn remove_fields(&mut self, tag: &str) {
706 while let Some(pos) = self.fields.iter().position(|f| f.tag() == tag) {
707 self.fields.remove(pos);
708 }
709 }
710
711 /// Extract MARC fields using a range of tags or a specification
712 /// inspired by [ruby-marc](https://github.com/ruby-marc/ruby-marc/),
713 /// [SolrMarc](https://github.com/solrmarc/solrmarc/wiki/Basic-field-based-extraction-specifications),
714 /// and [traject](https://github.com/traject/traject).
715 ///
716 /// # Specification syntax
717 ///
718 /// * A three-character tag will match any field that has that tag, for example `650` would
719 /// only match fields with the tag `650`.
720 /// * The letter `x` (or upper case `X`) can be used as a wildcard, for example `2xx` would
721 /// match any field with a tag that starts with the character `2`.
722 /// * Multiple specifications can be combined with a `:` between them, for example
723 /// `4xx:52x:901` would match any field with tag `901` or a tag that begins with
724 /// `4` or `52`.
725 ///
726 /// # Examples
727 ///
728 /// ```
729 /// use marctk::Record;
730 /// let record = Record::from_breaker(
731 /// r#"=600 10$aZhang, Heng, $d 78-139 $v Juvenile literature.
732 /// =650 \0$aEarthquakes $v Juvenile literature.
733 /// =955 \0$a1234"#
734 /// ).unwrap();
735 ///
736 /// let mut some_fields = record.extract_fields(600..=699);
737 /// assert_eq!(some_fields.next().unwrap().tag(), "600");
738 /// assert_eq!(some_fields.next().unwrap().tag(), "650");
739 /// assert!(some_fields.next().is_none());
740 ///
741 /// let mut more_fields = record.extract_fields("9xx");
742 /// assert_eq!(more_fields.next().unwrap().tag(), "955");
743 /// assert!(more_fields.next().is_none());
744 ///
745 /// let mut you_can_combine_specs = record.extract_fields("600:9xx");
746 /// assert_eq!(you_can_combine_specs.next().unwrap().tag(), "600");
747 /// assert_eq!(you_can_combine_specs.next().unwrap().tag(), "955");
748 /// assert!(you_can_combine_specs.next().is_none());
749 /// ```
750 pub fn extract_fields(
751 &self,
752 query: impl Into<crate::query::FieldQuery>,
753 ) -> impl Iterator<Item = &Field> {
754 self.fields().iter().filter(query.into().field_filter)
755 }
756
757 /// Mutable variant of [`extract_fields()`].
758 ///
759 /// # Examples
760 ///
761 /// ```
762 /// use marctk::Record;
763 /// let mut record = Record::from_breaker(
764 /// r#"=600 10$aZhang, Heng, $d 78-139 $v Juvenile literature.
765 /// =650 \0$aEarthquakes $v Juvenile literature.
766 /// =955 \0$a1234"#
767 /// ).unwrap();
768 ///
769 /// for field in record.extract_fields_mut(600..=699) {
770 /// field.first_subfield_mut("a").unwrap().set_content("HELLOOO");
771 /// field.add_subfield("x", "X CONTENT");
772 /// }
773 ///
774 /// // This is kinda lazy, but you get the idea.
775 /// assert!(record.to_breaker().contains("$xX CONTENT"));
776 /// ```
777 pub fn extract_fields_mut(
778 &mut self,
779 query: impl Into<crate::query::FieldQueryMut>,
780 ) -> impl Iterator<Item = &mut Field> {
781 self.fields_mut()
782 .iter_mut()
783 .filter(query.into().field_filter)
784 }
785
786 /// Extract only certain desired subfields from fields using a specification
787 /// inspired by [ruby-marc](https://github.com/ruby-marc/ruby-marc/),
788 /// [SolrMarc](https://github.com/solrmarc/solrmarc/wiki/Basic-field-based-extraction-specifications),
789 /// and [traject](https://github.com/traject/traject).
790 ///
791 /// # Specification syntax
792 ///
793 /// * A three-character tag will match any field that has that tag, for example `650` would
794 /// only match fields with the tag `650`.
795 /// * The letter `x` (or upper case `X`) can be used as a wildcard, for example `2xx` would
796 /// match any field with a tag that starts with the character `2`.
797 /// * Tags are optionally followed by indicators in parentheses. For example, `650(00)` would
798 /// match fields with first and second indicators equal to zero. `650(**)` would match
799 /// fields with any indicators, which is the same as omitting the indicators from the
800 /// specification altogether. `650(_0)` and `650( 0)` both match fields with an empty
801 /// first indicator, although you may find using the `_` version clearer.
802 /// * Each specification can optionally end with a list of subfield codes. For example,
803 /// `245abc` would match 245 fields and select only subfields `a`, `b`, and `c`.
804 /// * Multiple specifications can be combined with a `:` between them, for example
805 /// `60x(*0)a:650av:653` would select
806 /// * subfield `a` from any field that begins with `60` and has second indicator `0`, and
807 /// * subfields `a` and `v` from any field with tag `650`, and
808 /// * any subfield from any field with tag `653`
809 ///
810 /// Returns an iterator over fields. You can call the `subfields()` method on the result
811 /// to iterate through the requested subfields.
812 ///
813 /// # Examples
814 ///
815 /// ```
816 /// use marctk::Record;
817 /// let record = Record::from_breaker(
818 /// r#"=600 10$aZhang, Heng, $d 78-139 $v Juvenile literature.
819 /// =650 \0$aEarthquakes $v Juvenile literature.
820 /// =955 \0$a1234"#
821 /// ).unwrap();
822 ///
823 /// let fields = record.extract_partial_fields("600a");
824 /// assert_eq!(fields.len(), 1);
825 ///
826 /// let field = fields.first().unwrap();
827 /// assert_eq!(field.tag(), "600");
828 /// assert_eq!(field.subfields().len(), 1);
829 /// assert_eq!(field.subfields()[0].code(), "a");
830 /// ```
831 ///
832 /// An example with indicators specified:
833 ///
834 /// ```
835 /// use marctk::Record;
836 /// let record = Record::from_breaker(
837 /// r#"=650 \0$aEarthquakes $v Juvenile literature.
838 ///=650 \0$aEarthquake damage $v Juvenile literature.
839 ///=650 \4$aNon-LCSH term"#
840 /// ).unwrap();
841 ///
842 /// let fields = record.extract_partial_fields("650(*0)a");
843 /// let terms: Vec<_> = fields.into_iter().map(|f|f.first_subfield("a").unwrap().content().to_string()).collect();
844 /// assert_eq!(terms, vec!["Earthquakes ", "Earthquake damage "]);
845 /// ```
846 ///
847 /// An example with multiple specifications (which means more potential
848 /// matching fields and subfields):
849 ///
850 /// ```
851 /// use marctk::Record;
852 /// let record = Record::from_breaker(
853 /// r#"=600 10$aZhang, Heng, $d 78-139 $v Juvenile literature.
854 ///=650 \0$aAmusement parks $vComic books, strips, etc.
855 ///=655 \7$aHorror comics. $2lcgft
856 ///=655 \7$aGraphic novels. $2lcgft"#
857 /// ).unwrap();
858 ///
859 /// let genre_query = "600(*0)vx:650(*0)vx:655(*0)avx:655(*7)avx";
860 /// let fields = record.extract_partial_fields(genre_query);
861 /// let matching_subfields = fields.iter().fold(Vec::new(), |mut accumulator, field| {
862 /// accumulator.extend(field.subfields());
863 /// accumulator
864 /// });
865 /// let terms: Vec<&str> = matching_subfields.iter().map(|sf| sf.content()).collect();
866 /// assert_eq!(
867 /// terms,
868 /// vec![" Juvenile literature.", "Comic books, strips, etc.", "Horror comics. ", "Graphic novels. "]
869 /// );
870 /// ```
871 pub fn extract_partial_fields(&self, query: &str) -> Vec<Field> {
872 let specs: Vec<ComplexSpecification> =
873 query.split(':').map(ComplexSpecification::from).collect();
874 let matching_fields = self
875 .fields()
876 .iter()
877 .filter(|f| specs.iter().any(|spec| spec.matches_field(f)));
878 matching_fields
879 .map(|field| {
880 let mut new_field = field.clone();
881 new_field
882 .subfields_mut()
883 .retain(|sf| specs.iter().any(|spec| spec.subfield_filter()(sf, field)));
884 new_field
885 })
886 .collect()
887 }
888
889 /// Extract only certain desired subfields from fields using a specification.
890 /// See [`extract_partial_fields`] for the details of the specification syntax.
891 ///
892 /// # Examples
893 ///
894 /// ```
895 /// use marctk::Record;
896 /// let record = Record::from_breaker(
897 /// r#"=600 10$aZhang, Heng, $d 78-139 $v Juvenile literature.
898 ///=650 \0$aAmusement parks $vComic books, strips, etc.
899 ///=655 \7$aHorror comics. $2lcgft
900 ///=655 \7$aGraphic novels. $2lcgft"#
901 /// ).unwrap();
902 ///
903 /// let genre_query = "600(*0)vx:650(*0)vx:655(*0)avx:655(*7)avx";
904 /// let values = record.extract_values(genre_query);
905 /// assert_eq!(
906 /// values,
907 /// vec![" Juvenile literature.", "Comic books, strips, etc.", "Horror comics. ", "Graphic novels. "]
908 /// );
909 /// ```
910 ///
911 /// [`extract_partial_fields`]: crate::Record::extract_partial_fields
912 pub fn extract_values(&self, query: &str) -> Vec<String> {
913 self.extract_partial_fields(query)
914 .iter()
915 .fold(Vec::new(), |mut accumulator, field| {
916 accumulator.extend(field.subfields());
917 accumulator
918 })
919 .iter()
920 .map(|sf| sf.content().to_owned())
921 .collect()
922 }
923}