1use super::Controlfield;
3use super::Field;
4use super::Record;
5use super::Subfield;
6use std::fs::File;
7use std::io::prelude::*;
8
9const END_OF_FIELD: u8 = 30; const END_OF_RECORD: u8 = 29; const RECORD_SIZE_ENTRY: usize = 5;
12const LEADER_SIZE: usize = 24;
13const DATA_OFFSET_START: usize = 12;
14const DATA_OFFSET_SIZE: usize = 5;
15const DATA_LENGTH_SIZE: usize = 4;
16const DIRECTORY_ENTRY_LEN: usize = 12;
17const SUBFIELD_SEPARATOR: &str = "\x1F";
18const MAX_RECORD_BYTES: usize = 99999;
19
20pub struct BinaryRecordIterator {
22 file: File,
23}
24
25impl Iterator for BinaryRecordIterator {
26 type Item = Result<Record, String>;
27
28 fn next(&mut self) -> Option<Self::Item> {
30 let mut bytes: Vec<u8> = Vec::new();
31
32 loop {
33 let mut buf: [u8; 1] = [0];
37 match self.file.read(&mut buf) {
38 Ok(count) => {
39 if count == 1 {
40 bytes.push(buf[0]);
41 if buf[0] == END_OF_RECORD {
42 break;
43 }
44 } else {
45 break; }
47 }
48 Err(e) => {
49 return Some(Err(format!("Error reading file: {:?} {}", self.file, e)));
50 }
51 }
52 }
53
54 if !bytes.is_empty() {
55 match Record::from_binary(bytes.as_slice()) {
56 Ok(r) => return Some(Ok(r)),
57 Err(e) => return Some(Err(format!("Error processing bytes: {:?} {}", bytes, e))),
58 }
59 }
60
61 None
62 }
63}
64
65impl BinaryRecordIterator {
66 fn from_file(filename: &str) -> Result<Self, String> {
68 let file = match File::open(filename) {
69 Ok(f) => f,
70 Err(e) => return Err(format!("Cannot read MARC file: {filename} {e}")),
71 };
72
73 Ok(BinaryRecordIterator { file })
74 }
75}
76
77pub fn bytes_to_usize(bytes: &[u8]) -> Result<usize, String> {
87 match std::str::from_utf8(bytes) {
88 Ok(bytes_str) => match bytes_str.parse::<usize>() {
89 Ok(num) => Ok(num),
90 Err(e) => Err(format!(
91 "Error translating string to usize str={bytes_str} {e}"
92 )),
93 },
94 Err(e) => Err(format!("Error translating bytes to string: {bytes:?} {e}")),
95 }
96}
97
98pub struct DirectoryEntry {
101 tag: String,
102 field_start_idx: usize,
103 field_end_idx: usize,
104}
105
106impl DirectoryEntry {
107 pub fn new(which: usize, data_start_idx: usize, dir_bytes: &[u8]) -> Result<Self, String> {
119 let start = which * DIRECTORY_ENTRY_LEN;
120 let end = start + DIRECTORY_ENTRY_LEN;
121 let bytes = &dir_bytes[start..end];
122
123 let entry_str = match std::str::from_utf8(bytes) {
124 Ok(s) => s,
125 Err(e) => return Err(format!("Invalid directory bytes: {:?} {}", bytes, e)),
126 };
127
128 let field_tag = &entry_str[0..3];
129 let field_len_str = &entry_str[3..7];
130 let field_pos_str = &entry_str[7..12];
131
132 let field_len = match field_len_str.parse::<usize>() {
133 Ok(l) => l,
134 Err(e) => return Err(format!("Invalid data length value {} {}", field_len_str, e)),
135 };
136
137 let field_start_idx = match field_pos_str.parse::<usize>() {
139 Ok(l) => l,
140 Err(e) => {
141 return Err(format!(
142 "Invalid data position value {} {}",
143 field_pos_str, e
144 ));
145 }
146 };
147
148 let start = field_start_idx + data_start_idx;
149 let last = start + field_len - 1; Ok(DirectoryEntry {
152 tag: field_tag.to_string(),
153 field_start_idx: start,
154 field_end_idx: last,
155 })
156 }
157}
158
159impl Record {
160 pub fn from_binary_file(filename: &str) -> Result<BinaryRecordIterator, String> {
162 BinaryRecordIterator::from_file(filename)
163 }
164
165 pub fn from_binary(rec_bytes: &[u8]) -> Result<Record, String> {
172 let mut record = Record::new();
173
174 let rec_byte_count = rec_bytes.len();
175
176 if rec_byte_count < LEADER_SIZE {
177 return Err(format!("Binary record is too short: {:?}", rec_bytes));
178 }
179
180 let leader_bytes = &rec_bytes[0..LEADER_SIZE];
181
182 let size_bytes = &leader_bytes[0..RECORD_SIZE_ENTRY];
184
185 let rec_size = bytes_to_usize(size_bytes)?;
187
188 if rec_byte_count != rec_size {
189 return Err(format!(
190 "Record has incorrect size reported={} real={}",
191 rec_size, rec_byte_count
192 ));
193 }
194
195 record.set_leader_bytes(leader_bytes)?;
196
197 let data_offset_bytes =
199 &leader_bytes[DATA_OFFSET_START..(DATA_OFFSET_START + DATA_OFFSET_SIZE)];
200
201 let data_start_idx = bytes_to_usize(data_offset_bytes)?;
202
203 let dir_bytes = &rec_bytes[LEADER_SIZE..(data_start_idx - 1)];
206
207 let dir_len = dir_bytes.len();
209 if dir_len == 0 || dir_len % DIRECTORY_ENTRY_LEN != 0 {
210 return Err(format!("Invalid directory length {}", dir_len));
211 }
212
213 let dir_count = dir_bytes.len() / DIRECTORY_ENTRY_LEN;
215 let mut dir_idx = 0;
216
217 while dir_idx < dir_count {
218 let dir_entry = DirectoryEntry::new(dir_idx, data_start_idx, dir_bytes)?;
219
220 if let Err(e) = record.process_directory_entry(rec_bytes, rec_byte_count, &dir_entry) {
221 return Err(format!(
222 "Error processing directory entry index={} {}",
223 dir_idx, e
224 ));
225 }
226
227 dir_idx += 1;
228 }
229
230 Ok(record)
231 }
232
233 fn process_directory_entry(
240 &mut self,
241 rec_bytes: &[u8], rec_byte_count: usize, dir_entry: &DirectoryEntry,
244 ) -> Result<(), String> {
245 if (dir_entry.field_end_idx) >= rec_byte_count {
246 return Err(format!(
247 "Field length exceeds length of record for tag={}",
248 dir_entry.tag
249 ));
250 }
251
252 let field_bytes = &rec_bytes[dir_entry.field_start_idx..dir_entry.field_end_idx];
254
255 let field_str = match std::str::from_utf8(field_bytes) {
257 Ok(s) => s,
258 Err(e) => {
259 return Err(format!(
260 "Field data is not UTF-8 compatible: {:?} {}",
261 field_bytes, e
262 ));
263 }
264 };
265
266 if dir_entry.tag.as_str() < "010" {
267 let content = if !field_str.is_empty() { field_str } else { "" };
268
269 let cf = Controlfield::new(&dir_entry.tag, content)?;
270 self.control_fields_mut().push(cf);
271 return Ok(());
272 }
273
274 let mut field = Field::new(&dir_entry.tag)?;
278
279 field.set_ind1(&field_str[..1])?;
280 field.set_ind2(&field_str[1..2])?;
281
282 let field_parts: Vec<&str> = field_str.split(SUBFIELD_SEPARATOR).collect();
285
286 for part in &field_parts[1..] {
287 let sf = Subfield::new(&part[..1], if part.len() > 1 { &part[1..] } else { "" })?;
289 field.subfields_mut().push(sf);
290 }
291
292 self.fields_mut().push(field);
293
294 Ok(())
295 }
296
297 pub fn to_binary(&self) -> Result<Vec<u8>, String> {
314 let mut bytes: Vec<u8> = Vec::new();
315
316 bytes.append(&mut self.leader().as_bytes().to_vec());
317
318 let num_dirs = self.build_directory(&mut bytes);
320
321 bytes.push(END_OF_FIELD);
323
324 self.add_data_fields(&mut bytes);
325
326 bytes.push(END_OF_RECORD);
328
329 self.sync_leader(num_dirs, &mut bytes)?;
331
332 Ok(bytes)
333 }
334
335 fn build_directory(&self, bytes: &mut Vec<u8>) -> usize {
341 let mut num_dirs = 0;
342 let mut prev_end_idx = 0;
343
344 for field in self.control_fields() {
345 num_dirs += 1;
346
347 let mut field_len = field.content().len();
348
349 field_len += 1; let s = format!(
353 "{}{:0w1$}{:0w2$}",
354 field.tag(),
355 field_len,
356 prev_end_idx, w1 = DATA_LENGTH_SIZE,
358 w2 = DATA_OFFSET_SIZE
359 );
360
361 bytes.append(&mut s.as_bytes().to_vec());
362
363 prev_end_idx += field_len;
364 }
365
366 for field in self.fields() {
367 num_dirs += 1;
368
369 let mut field_len = 3; for sf in field.subfields() {
371 field_len += 2; field_len += sf.content().len();
373 }
374
375 let s = format!(
377 "{}{:0w1$}{:0w2$}",
378 field.tag(),
379 field_len,
380 prev_end_idx, w1 = DATA_LENGTH_SIZE,
382 w2 = DATA_OFFSET_SIZE
383 );
384
385 bytes.append(&mut s.as_bytes().to_vec());
386
387 prev_end_idx += field_len;
388 }
389
390 num_dirs
391 }
392
393 fn add_data_fields(&self, bytes: &mut Vec<u8>) {
395 for field in self.control_fields() {
397 bytes.append(&mut field.content().as_bytes().to_vec());
398 bytes.push(END_OF_FIELD);
399 }
400
401 for field in self.fields() {
402 let s = format!("{}{}", field.ind1(), field.ind2());
403 bytes.append(&mut s.as_bytes().to_vec());
404
405 for sf in field.subfields() {
406 let s = format!("{}{}{}", SUBFIELD_SEPARATOR, sf.code(), sf.content());
407 bytes.append(&mut s.as_bytes().to_vec());
408 }
409
410 bytes.push(END_OF_FIELD);
411 }
412 }
413
414 fn sync_leader(&self, num_dirs: usize, bytes: &mut [u8]) -> Result<(), String> {
417 let blen = bytes.len();
418
419 if blen > MAX_RECORD_BYTES {
420 return Err(format!(
421 "MARC byte count {blen} too large for binary encoding"
422 ));
423 }
424
425 let size_str = format!("{:0w$}", blen, w = RECORD_SIZE_ENTRY);
426 let size_bytes = size_str.as_bytes();
427
428 bytes[0..RECORD_SIZE_ENTRY].copy_from_slice(size_bytes);
429
430 let data_start_idx = LEADER_SIZE + (num_dirs * DIRECTORY_ENTRY_LEN) + 1; let data_start_str = format!("{:0w$}", data_start_idx, w = DATA_OFFSET_SIZE);
433
434 let dstart = DATA_OFFSET_START;
435 let dend = dstart + DATA_OFFSET_SIZE;
436
437 bytes[dstart..dend].copy_from_slice(data_start_str.as_bytes());
438
439 Ok(())
440 }
441}