From fec26926a83de45ff0ec6870608b9828d20fdc8d Mon Sep 17 00:00:00 2001 From: Thomas Eizinger Date: Thu, 25 Feb 2021 11:20:47 +1100 Subject: [PATCH] Squashed 'tokio-tar/' content from commit 43dd166 git-subtree-dir: tokio-tar git-subtree-split: 43dd166d0f3aff67891cd1c1bf4d6bfb984bb789 --- .github/workflows/main.yml | 49 + .gitignore | 2 + Cargo.toml | 44 + LICENSE-APACHE | 201 ++ LICENSE-MIT | 25 + README.md | 97 + examples/extract_file.rs | 28 + examples/list.rs | 21 + examples/raw_list.rs | 54 + examples/write.rs | 16 + src/archive.rs | 610 +++++++ src/builder.rs | 633 +++++++ src/entry.rs | 955 ++++++++++ src/entry_type.rs | 189 ++ src/error.rs | 40 + src/header.rs | 1620 +++++++++++++++++ src/lib.rs | 45 + src/pax.rs | 88 + tests/all.rs | 1117 ++++++++++++ tests/archives/directory.tar | Bin 0 -> 10240 bytes tests/archives/duplicate_dirs.tar | Bin 0 -> 2048 bytes tests/archives/empty_filename.tar | Bin 0 -> 512 bytes tests/archives/file_times.tar | Bin 0 -> 1536 bytes tests/archives/link.tar | Bin 0 -> 10240 bytes tests/archives/pax.tar | Bin 0 -> 10240 bytes tests/archives/pax2.tar | Bin 0 -> 10240 bytes tests/archives/reading_files.tar | Bin 0 -> 10240 bytes tests/archives/simple.tar | Bin 0 -> 10240 bytes tests/archives/simple_missing_last_header.tar | Bin 0 -> 9728 bytes tests/archives/spaces.tar | Bin 0 -> 2048 bytes tests/archives/sparse.tar | Bin 0 -> 10240 bytes tests/archives/xattrs.tar | Bin 0 -> 10240 bytes tests/entry.rs | 350 ++++ tests/header/mod.rs | 243 +++ 34 files changed, 6427 insertions(+) create mode 100644 .github/workflows/main.yml create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 LICENSE-APACHE create mode 100644 LICENSE-MIT create mode 100644 README.md create mode 100644 examples/extract_file.rs create mode 100644 examples/list.rs create mode 100644 examples/raw_list.rs create mode 100644 examples/write.rs create mode 100644 src/archive.rs create mode 100644 src/builder.rs create mode 100644 src/entry.rs create mode 100644 src/entry_type.rs create mode 100644 src/error.rs create mode 100644 src/header.rs create mode 100644 src/lib.rs create mode 100644 src/pax.rs create mode 100644 tests/all.rs create mode 100644 tests/archives/directory.tar create mode 100644 tests/archives/duplicate_dirs.tar create mode 100644 tests/archives/empty_filename.tar create mode 100644 tests/archives/file_times.tar create mode 100644 tests/archives/link.tar create mode 100644 tests/archives/pax.tar create mode 100644 tests/archives/pax2.tar create mode 100644 tests/archives/reading_files.tar create mode 100644 tests/archives/simple.tar create mode 100644 tests/archives/simple_missing_last_header.tar create mode 100644 tests/archives/spaces.tar create mode 100644 tests/archives/sparse.tar create mode 100644 tests/archives/xattrs.tar create mode 100644 tests/entry.rs create mode 100644 tests/header/mod.rs diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 00000000..e546e533 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,49 @@ +on: [push, pull_request] + +name: Continuous integration + +jobs: + ci: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, windows-latest, macOS-latest] + + steps: + - uses: actions/checkout@v2 + + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + components: rustfmt, clippy + + - uses: actions-rs/cargo@v1 + with: + command: fmt + args: --all -- --check + + - uses: actions-rs/cargo@v1 + with: + command: install + args: cargo-hack + + - uses: actions-rs/cargo@v1 + with: + command: hack + args: check --all --ignore-private --each-feature --no-dev-deps + + - uses: actions-rs/cargo@v1 + with: + command: check + args: --all --all-targets --all-features + + - uses: actions-rs/cargo@v1 + with: + command: test + + - uses: actions-rs/cargo@v1 + with: + command: clippy + args: -- -D warnings diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..4fffb2f8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +/Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 00000000..4c6b935f --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,44 @@ +[package] +name = "tokio-tar" +version = "0.2.0" +authors = [ + "Alex Crichton ", + "dignifiedquire ", + "Artem Vorotnikov ", + "Aiden McClelland ", +] +homepage = "https://github.com/vorot93/tokio-tar" +repository = "https://github.com/vorot93/tokio-tar" +documentation = "https://docs.rs/tokio-tar" +license = "MIT/Apache-2.0" +keywords = ["tar", "tarfile", "encoding"] +readme = "README.md" +edition = "2018" +exclude = ["tests/archives/*"] + +description = """ +A Rust implementation of an async TAR file reader and writer. This library does not +currently handle compression, but it is abstract over all I/O readers and +writers. Additionally, great lengths are taken to ensure that the entire +contents are never required to be entirely resident in memory all at once. +""" + +[dependencies] +filetime = "0.2.13" +futures-core = "0.3" +tokio = { version = "1.0.1", features = ["fs", "io-util", "rt"] } +tokio-stream = "0.1.1" + +[dev-dependencies] +tempfile = "3" +tokio = { version = "1.0.1", features = ["full"] } + +[target."cfg(unix)".dependencies] +xattr = { version = "0.2", optional = true } +libc = "0.2" + +[target.'cfg(target_os = "redox")'.dependencies] +redox_syscall = "0.2" + +[features] +default = ["xattr"] diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 00000000..16fe87b0 --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 00000000..39e0ed66 --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2014 Alex Crichton + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 00000000..dc8c2436 --- /dev/null +++ b/README.md @@ -0,0 +1,97 @@ +

tokio-tar

+
+ + A tar archive reading/writing library for async Rust. + +
+ +
+ +
+ + + Crates.io version + + + + Download + + + + docs.rs docs + +
+ + +
+ +> Based on the great [tar-rs](https://github.com/alexcrichton/tar-rs). + +## Reading an archive + +```rust,no_run +use tokio::io::stdin; +use tokio::prelude::*; + +use tokio_tar::Archive; + +fn main() { + tokio::runtime::Runtime::new().unwrap().block_on(async { + let mut ar = Archive::new(stdin()); + let mut entries = ar.entries().unwrap(); + while let Some(file) = entries.next().await { + let f = file.unwrap(); + println!("{}", f.path().unwrap().display()); + } + }); +} +``` + +## Writing an archive + +```rust,no_run +use tokio::fs::File; +use tokio_tar::Builder; + +fn main() { + tokio::runtime::Runtime::new().unwrap().block_on(async { + let file = File::create("foo.tar").await.unwrap(); + let mut a = Builder::new(file); + + a.append_path("README.md").await.unwrap(); + a.append_file("lib.rs", &mut File::open("src/lib.rs").await.unwrap()) + .await + .unwrap(); + }); +} +``` + +# License + +This project is licensed under either of + + * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or + http://www.apache.org/licenses/LICENSE-2.0) + * MIT license ([LICENSE-MIT](LICENSE-MIT) or + http://opensource.org/licenses/MIT) + +at your option. + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in this project by you, as defined in the Apache-2.0 license, +shall be dual licensed as above, without any additional terms or conditions. diff --git a/examples/extract_file.rs b/examples/extract_file.rs new file mode 100644 index 00000000..ba414bb1 --- /dev/null +++ b/examples/extract_file.rs @@ -0,0 +1,28 @@ +//! An example of extracting a file in an archive. +//! +//! Takes a tarball on standard input, looks for an entry with a listed file +//! name as the first argument provided, and then prints the contents of that +//! file to stdout. + +extern crate tokio_tar as async_tar; + +use std::{env::args_os, path::Path}; +use tokio::io::{copy, stdin, stdout}; +use tokio_stream::*; + +use async_tar::Archive; + +fn main() { + tokio::runtime::Runtime::new().unwrap().block_on(async { + let first_arg = args_os().nth(1).unwrap(); + let filename = Path::new(&first_arg); + let mut ar = Archive::new(stdin()); + let mut entries = ar.entries().unwrap(); + while let Some(file) = entries.next().await { + let mut f = file.unwrap(); + if f.path().unwrap() == filename { + copy(&mut f, &mut stdout()).await.unwrap(); + } + } + }); +} diff --git a/examples/list.rs b/examples/list.rs new file mode 100644 index 00000000..b05e29ec --- /dev/null +++ b/examples/list.rs @@ -0,0 +1,21 @@ +//! An example of listing the file names of entries in an archive. +//! +//! Takes a tarball on stdin and prints out all of the entries inside. + +extern crate tokio_tar as async_tar; + +use tokio::io::stdin; +use tokio_stream::*; + +use async_tar::Archive; + +fn main() { + tokio::runtime::Runtime::new().unwrap().block_on(async { + let mut ar = Archive::new(stdin()); + let mut entries = ar.entries().unwrap(); + while let Some(file) = entries.next().await { + let f = file.unwrap(); + println!("{}", f.path().unwrap().display()); + } + }); +} diff --git a/examples/raw_list.rs b/examples/raw_list.rs new file mode 100644 index 00000000..27e06152 --- /dev/null +++ b/examples/raw_list.rs @@ -0,0 +1,54 @@ +//! An example of listing raw entries in an archive. +//! +//! Takes a tarball on stdin and prints out all of the entries inside. + +extern crate tokio_tar as async_tar; + +use tokio::io::stdin; +use tokio_stream::*; + +use async_tar::Archive; + +fn main() { + tokio::runtime::Runtime::new().unwrap().block_on(async { + let mut ar = Archive::new(stdin()); + let mut i = 0; + let mut entries = ar.entries_raw().unwrap(); + while let Some(file) = entries.next().await { + println!("-------------------------- Entry {}", i); + let mut f = file.unwrap(); + println!("path: {}", f.path().unwrap().display()); + println!("size: {}", f.header().size().unwrap()); + println!("entry size: {}", f.header().entry_size().unwrap()); + println!("link name: {:?}", f.link_name().unwrap()); + println!("file type: {:#x}", f.header().entry_type().as_byte()); + println!("mode: {:#o}", f.header().mode().unwrap()); + println!("uid: {}", f.header().uid().unwrap()); + println!("gid: {}", f.header().gid().unwrap()); + println!("mtime: {}", f.header().mtime().unwrap()); + println!("username: {:?}", f.header().username().unwrap()); + println!("groupname: {:?}", f.header().groupname().unwrap()); + + if f.header().as_ustar().is_some() { + println!("kind: UStar"); + } else if f.header().as_gnu().is_some() { + println!("kind: GNU"); + } else { + println!("kind: normal"); + } + + if let Ok(Some(extensions)) = f.pax_extensions().await { + println!("pax extensions:"); + for e in extensions { + let e = e.unwrap(); + println!( + "\t{:?} = {:?}", + String::from_utf8_lossy(e.key_bytes()), + String::from_utf8_lossy(e.value_bytes()) + ); + } + } + i += 1; + } + }); +} diff --git a/examples/write.rs b/examples/write.rs new file mode 100644 index 00000000..1fcc50ea --- /dev/null +++ b/examples/write.rs @@ -0,0 +1,16 @@ +extern crate tokio_tar as async_tar; + +use async_tar::Builder; +use tokio::fs::File; + +fn main() { + tokio::runtime::Runtime::new().unwrap().block_on(async { + let file = File::create("foo.tar").await.unwrap(); + let mut a = Builder::new(file); + + a.append_path("README.md").await.unwrap(); + a.append_file("lib.rs", &mut File::open("src/lib.rs").await.unwrap()) + .await + .unwrap(); + }); +} diff --git a/src/archive.rs b/src/archive.rs new file mode 100644 index 00000000..1e4d3b3b --- /dev/null +++ b/src/archive.rs @@ -0,0 +1,610 @@ +use std::{ + cmp, + path::Path, + pin::Pin, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, + }, + task::{Context, Poll}, +}; +use tokio::{ + io::{self, AsyncRead as Read, AsyncReadExt}, + sync::Mutex, +}; +use tokio_stream::*; + +use crate::{ + entry::{EntryFields, EntryIo}, + error::TarError, + other, Entry, GnuExtSparseHeader, GnuSparseHeader, Header, +}; + +/// A top-level representation of an archive file. +/// +/// This archive can have an entry added to it and it can be iterated over. +#[derive(Debug)] +pub struct Archive { + inner: Arc>, +} + +impl Clone for Archive { + fn clone(&self) -> Self { + Archive { + inner: self.inner.clone(), + } + } +} + +#[derive(Debug)] +pub struct ArchiveInner { + pos: AtomicU64, + unpack_xattrs: bool, + preserve_permissions: bool, + preserve_mtime: bool, + ignore_zeros: bool, + obj: Mutex, +} + +/// Configure the archive. +pub struct ArchiveBuilder { + obj: R, + unpack_xattrs: bool, + preserve_permissions: bool, + preserve_mtime: bool, + ignore_zeros: bool, +} + +impl ArchiveBuilder { + /// Create a new builder. + pub fn new(obj: R) -> Self { + ArchiveBuilder { + unpack_xattrs: false, + preserve_permissions: false, + preserve_mtime: true, + ignore_zeros: false, + obj, + } + } + + /// Indicate whether extended file attributes (xattrs on Unix) are preserved + /// when unpacking this archive. + /// + /// This flag is disabled by default and is currently only implemented on + /// Unix using xattr support. This may eventually be implemented for + /// Windows, however, if other archive implementations are found which do + /// this as well. + pub fn set_unpack_xattrs(mut self, unpack_xattrs: bool) -> Self { + self.unpack_xattrs = unpack_xattrs; + self + } + + /// Indicate whether extended permissions (like suid on Unix) are preserved + /// when unpacking this entry. + /// + /// This flag is disabled by default and is currently only implemented on + /// Unix. + pub fn set_preserve_permissions(mut self, preserve: bool) -> Self { + self.preserve_permissions = preserve; + self + } + + /// Indicate whether access time information is preserved when unpacking + /// this entry. + /// + /// This flag is enabled by default. + pub fn set_preserve_mtime(mut self, preserve: bool) -> Self { + self.preserve_mtime = preserve; + self + } + + /// Ignore zeroed headers, which would otherwise indicate to the archive that it has no more + /// entries. + /// + /// This can be used in case multiple tar archives have been concatenated together. + pub fn set_ignore_zeros(mut self, ignore_zeros: bool) -> Self { + self.ignore_zeros = ignore_zeros; + self + } + + /// Construct the archive, ready to accept inputs. + pub fn build(self) -> Archive { + let Self { + unpack_xattrs, + preserve_permissions, + preserve_mtime, + ignore_zeros, + obj, + } = self; + + Archive { + inner: Arc::new(ArchiveInner { + unpack_xattrs, + preserve_permissions, + preserve_mtime, + ignore_zeros, + obj: Mutex::new(obj), + pos: 0.into(), + }), + } + } +} + +impl Archive { + /// Create a new archive with the underlying object as the reader. + pub fn new(obj: R) -> Archive { + Archive { + inner: Arc::new(ArchiveInner { + unpack_xattrs: false, + preserve_permissions: false, + preserve_mtime: true, + ignore_zeros: false, + obj: Mutex::new(obj), + pos: 0.into(), + }), + } + } + + /// Unwrap this archive, returning the underlying object. + pub fn into_inner(self) -> Result { + let Self { inner } = self; + + match Arc::try_unwrap(inner) { + Ok(inner) => Ok(inner.obj.into_inner()), + Err(inner) => Err(Self { inner }), + } + } + + /// Construct an stream over the entries in this archive. + /// + /// Note that care must be taken to consider each entry within an archive in + /// sequence. If entries are processed out of sequence (from what the + /// stream returns), then the contents read for each entry may be + /// corrupted. + pub fn entries(&mut self) -> io::Result> { + if self.inner.pos.load(Ordering::SeqCst) != 0 { + return Err(other( + "cannot call entries unless archive is at \ + position 0", + )); + } + + Ok(Entries { + archive: self.clone(), + next: 0, + gnu_longlink: None, + gnu_longname: None, + pax_extensions: None, + }) + } + + /// Construct an stream over the raw entries in this archive. + /// + /// Note that care must be taken to consider each entry within an archive in + /// sequence. If entries are processed out of sequence (from what the + /// stream returns), then the contents read for each entry may be + /// corrupted. + pub fn entries_raw(&mut self) -> io::Result> { + if self.inner.pos.load(Ordering::SeqCst) != 0 { + return Err(other( + "cannot call entries_raw unless archive is at \ + position 0", + )); + } + + Ok(RawEntries { + archive: self.clone(), + next: 0, + }) + } + + /// Unpacks the contents tarball into the specified `dst`. + /// + /// This function will iterate over the entire contents of this tarball, + /// extracting each file in turn to the location specified by the entry's + /// path name. + /// + /// This operation is relatively sensitive in that it will not write files + /// outside of the path specified by `dst`. Files in the archive which have + /// a '..' in their path are skipped during the unpacking process. + /// + /// # Examples + /// + /// ```no_run + /// # fn main() -> Result<(), Box> { tokio::runtime::Runtime::new().unwrap().block_on(async { + /// # + /// use tokio::fs::File; + /// use tokio_tar::Archive; + /// + /// let mut ar = Archive::new(File::open("foo.tar").await?); + /// ar.unpack("foo").await?; + /// # + /// # Ok(()) }) } + /// ``` + pub async fn unpack>(&mut self, dst: P) -> io::Result<()> { + let mut entries = self.entries()?; + let mut pinned = Pin::new(&mut entries); + while let Some(entry) = pinned.next().await { + let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive", e))?; + file.unpack_in(dst.as_ref()).await?; + } + Ok(()) + } +} + +/// Stream of `Entry`s. +pub struct Entries { + archive: Archive, + next: u64, + gnu_longname: Option>, + gnu_longlink: Option>, + pax_extensions: Option>, +} + +macro_rules! ready_opt_err { + ($val:expr) => { + match futures_core::ready!($val) { + Some(Ok(val)) => val, + Some(Err(err)) => return Poll::Ready(Some(Err(err))), + None => return Poll::Ready(None), + } + }; +} + +macro_rules! ready_err { + ($val:expr) => { + match futures_core::ready!($val) { + Ok(val) => val, + Err(err) => return Poll::Ready(Some(Err(err))), + } + }; +} + +impl Stream for Entries { + type Item = io::Result>>; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + loop { + let entry = ready_opt_err!(poll_next_raw(self.archive.clone(), &mut self.next, cx)); + + if entry.header().as_gnu().is_some() && entry.header().entry_type().is_gnu_longname() { + if self.gnu_longname.is_some() { + return Poll::Ready(Some(Err(other( + "two long name entries describing \ + the same member", + )))); + } + + let mut ef = EntryFields::from(entry); + let val = ready_err!(Pin::new(&mut ef).poll_read_all(cx)); + self.gnu_longname = Some(val); + continue; + } + + if entry.header().as_gnu().is_some() && entry.header().entry_type().is_gnu_longlink() { + if self.gnu_longlink.is_some() { + return Poll::Ready(Some(Err(other( + "two long name entries describing \ + the same member", + )))); + } + let mut ef = EntryFields::from(entry); + let val = ready_err!(Pin::new(&mut ef).poll_read_all(cx)); + self.gnu_longlink = Some(val); + continue; + } + + if entry.header().as_ustar().is_some() + && entry.header().entry_type().is_pax_local_extensions() + { + if self.pax_extensions.is_some() { + return Poll::Ready(Some(Err(other( + "two pax extensions entries describing \ + the same member", + )))); + } + let mut ef = EntryFields::from(entry); + let val = ready_err!(Pin::new(&mut ef).poll_read_all(cx)); + self.pax_extensions = Some(val); + continue; + } + + let mut fields = EntryFields::from(entry); + fields.long_pathname = self.gnu_longname.take(); + fields.long_linkname = self.gnu_longlink.take(); + fields.pax_extensions = self.pax_extensions.take(); + + ready_err!(poll_parse_sparse_header( + self.archive.clone(), + &mut self.next, + &mut fields, + cx + )); + + return Poll::Ready(Some(Ok(fields.into_entry()))); + } + } +} + +/// Stream of raw `Entry`s. +pub struct RawEntries { + archive: Archive, + next: u64, +} + +impl Stream for RawEntries { + type Item = io::Result>>; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + poll_next_raw(self.archive.clone(), &mut self.next, cx) + } +} + +fn poll_next_raw( + mut archive: Archive, + next: &mut u64, + cx: &mut Context<'_>, +) -> Poll>>>> { + let mut header = Header::new_old(); + let mut header_pos = *next; + + loop { + // Seek to the start of the next header in the archive + let delta = *next - archive.inner.pos.load(Ordering::SeqCst); + + match futures_core::ready!(poll_skip(&mut archive, cx, delta)) { + Ok(_) => {} + Err(err) => return Poll::Ready(Some(Err(err))), + } + + // EOF is an indicator that we are at the end of the archive. + match futures_core::ready!(poll_try_read_all(&mut archive, cx, header.as_mut_bytes())) { + Ok(true) => {} + Ok(false) => return Poll::Ready(None), + Err(err) => return Poll::Ready(Some(Err(err))), + } + + // If a header is not all zeros, we have another valid header. + // Otherwise, check if we are ignoring zeros and continue, or break as if this is the + // end of the archive. + if !header.as_bytes().iter().all(|i| *i == 0) { + *next += 512; + break; + } + + if !archive.inner.ignore_zeros { + return Poll::Ready(None); + } + + *next += 512; + header_pos = *next; + } + + // Make sure the checksum is ok + let sum = header.as_bytes()[..148] + .iter() + .chain(&header.as_bytes()[156..]) + .fold(0, |a, b| a + (*b as u32)) + + 8 * 32; + let cksum = header.cksum()?; + if sum != cksum { + return Poll::Ready(Some(Err(other("archive header checksum mismatch")))); + } + + let file_pos = *next; + let size = header.entry_size()?; + + let data = EntryIo::Data(archive.clone().take(size)); + + let ret = EntryFields { + size, + header_pos, + file_pos, + data: vec![data], + header, + long_pathname: None, + long_linkname: None, + pax_extensions: None, + unpack_xattrs: archive.inner.unpack_xattrs, + preserve_permissions: archive.inner.preserve_permissions, + preserve_mtime: archive.inner.preserve_mtime, + read_state: None, + }; + + // Store where the next entry is, rounding up by 512 bytes (the size of + // a header); + let size = (size + 511) & !(512 - 1); + *next += size; + + Poll::Ready(Some(Ok(ret.into_entry()))) +} + +fn poll_parse_sparse_header( + mut archive: Archive, + next: &mut u64, + entry: &mut EntryFields>, + cx: &mut Context<'_>, +) -> Poll> { + if !entry.header.entry_type().is_gnu_sparse() { + return Poll::Ready(Ok(())); + } + + let gnu = match entry.header.as_gnu() { + Some(gnu) => gnu, + None => return Poll::Ready(Err(other("sparse entry type listed but not GNU header"))), + }; + + // Sparse files are represented internally as a list of blocks that are + // read. Blocks are either a bunch of 0's or they're data from the + // underlying archive. + // + // Blocks of a sparse file are described by the `GnuSparseHeader` + // structure, some of which are contained in `GnuHeader` but some of + // which may also be contained after the first header in further + // headers. + // + // We read off all the blocks here and use the `add_block` function to + // incrementally add them to the list of I/O block (in `entry.data`). + // The `add_block` function also validates that each chunk comes after + // the previous, we don't overrun the end of the file, and each block is + // aligned to a 512-byte boundary in the archive itself. + // + // At the end we verify that the sparse file size (`Header::size`) is + // the same as the current offset (described by the list of blocks) as + // well as the amount of data read equals the size of the entry + // (`Header::entry_size`). + entry.data.truncate(0); + + let mut cur = 0; + let mut remaining = entry.size; + { + let data = &mut entry.data; + let reader = archive.clone(); + let size = entry.size; + let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> { + if block.is_empty() { + return Ok(()); + } + let off = block.offset()?; + let len = block.length()?; + + if (size - remaining) % 512 != 0 { + return Err(other( + "previous block in sparse file was not \ + aligned to 512-byte boundary", + )); + } else if off < cur { + return Err(other( + "out of order or overlapping sparse \ + blocks", + )); + } else if cur < off { + let block = io::repeat(0).take(off - cur); + data.push(EntryIo::Pad(block)); + } + cur = off + .checked_add(len) + .ok_or_else(|| other("more bytes listed in sparse file than u64 can hold"))?; + remaining = remaining.checked_sub(len).ok_or_else(|| { + other( + "sparse file consumed more data than the header \ + listed", + ) + })?; + data.push(EntryIo::Data(reader.clone().take(len))); + Ok(()) + }; + for block in gnu.sparse.iter() { + add_block(block)? + } + if gnu.is_extended() { + let mut ext = GnuExtSparseHeader::new(); + ext.isextended[0] = 1; + while ext.is_extended() { + match futures_core::ready!(poll_try_read_all(&mut archive, cx, ext.as_mut_bytes())) + { + Ok(true) => {} + Ok(false) => return Poll::Ready(Err(other("failed to read extension"))), + Err(err) => return Poll::Ready(Err(err)), + } + + *next += 512; + for block in ext.sparse.iter() { + add_block(block)?; + } + } + } + } + if cur != gnu.real_size()? { + return Poll::Ready(Err(other( + "mismatch in sparse file chunks and \ + size in header", + ))); + } + entry.size = cur; + if remaining > 0 { + return Poll::Ready(Err(other( + "mismatch in sparse file chunks and \ + entry size in header", + ))); + } + + Poll::Ready(Ok(())) +} + +impl Read for Archive { + fn poll_read( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + into: &mut io::ReadBuf<'_>, + ) -> Poll> { + let mut r = if let Ok(v) = self.inner.obj.try_lock() { + v + } else { + return Poll::Pending; + }; + + let res = futures_core::ready!(Pin::new(&mut *r).poll_read(cx, into)); + match res { + Ok(()) => { + self.inner + .pos + .fetch_add(into.filled().len() as u64, Ordering::SeqCst); + Poll::Ready(Ok(())) + } + Err(err) => Poll::Ready(Err(err)), + } + } +} + +/// Try to fill the buffer from the reader. +/// +/// If the reader reaches its end before filling the buffer at all, returns `false`. +/// Otherwise returns `true`. +fn poll_try_read_all( + mut source: R, + cx: &mut Context<'_>, + buf: &mut [u8], +) -> Poll> { + let mut read = 0; + while read < buf.len() { + let mut read_buf = io::ReadBuf::new(&mut buf[read..]); + match futures_core::ready!(Pin::new(&mut source).poll_read(cx, &mut read_buf)) { + Ok(()) if read_buf.filled().is_empty() => { + if read == 0 { + return Poll::Ready(Ok(false)); + } + + return Poll::Ready(Err(other("failed to read entire block"))); + } + Ok(()) => read += read_buf.filled().len(), + Err(err) => return Poll::Ready(Err(err)), + } + } + + Poll::Ready(Ok(true)) +} + +/// Skip n bytes on the given source. +fn poll_skip( + mut source: R, + cx: &mut Context<'_>, + mut amt: u64, +) -> Poll> { + let mut buf = [0u8; 4096 * 8]; + while amt > 0 { + let n = cmp::min(amt, buf.len() as u64); + let mut read_buf = io::ReadBuf::new(&mut buf[..n as usize]); + match futures_core::ready!(Pin::new(&mut source).poll_read(cx, &mut read_buf)) { + Ok(()) if read_buf.filled().is_empty() => { + return Poll::Ready(Err(other("unexpected EOF during skip"))); + } + Ok(()) => { + amt -= read_buf.filled().len() as u64; + } + Err(err) => return Poll::Ready(Err(err)), + } + } + + Poll::Ready(Ok(())) +} diff --git a/src/builder.rs b/src/builder.rs new file mode 100644 index 00000000..08c46ba0 --- /dev/null +++ b/src/builder.rs @@ -0,0 +1,633 @@ +use crate::{ + header::{bytes2path, path2bytes, HeaderMode}, + other, EntryType, Header, +}; +use std::{borrow::Cow, fs::Metadata, path::Path}; +use tokio::{ + fs, + io::{self, AsyncRead as Read, AsyncReadExt, AsyncWrite as Write, AsyncWriteExt}, +}; + +/// A structure for building archives +/// +/// This structure has methods for building up an archive from scratch into any +/// arbitrary writer. +pub struct Builder { + mode: HeaderMode, + follow: bool, + finished: bool, + obj: Option, + cancellation: Option>, +} + +impl Builder { + /// Create a new archive builder with the underlying object as the + /// destination of all data written. The builder will use + /// `HeaderMode::Complete` by default. + pub fn new(obj: W) -> Builder { + let (tx, rx) = tokio::sync::oneshot::channel::(); + tokio::spawn(async move { + if let Ok(mut w) = rx.await { + let _ = w.write_all(&[0; 1024]).await; + } + }); + Builder { + mode: HeaderMode::Complete, + follow: true, + finished: false, + obj: Some(obj), + cancellation: Some(tx), + } + } + + /// Changes the HeaderMode that will be used when reading fs Metadata for + /// methods that implicitly read metadata for an input Path. Notably, this + /// does _not_ apply to `append(Header)`. + pub fn mode(&mut self, mode: HeaderMode) { + self.mode = mode; + } + + /// Follow symlinks, archiving the contents of the file they point to rather + /// than adding a symlink to the archive. Defaults to true. + pub fn follow_symlinks(&mut self, follow: bool) { + self.follow = follow; + } + + /// Gets shared reference to the underlying object. + pub fn get_ref(&self) -> &W { + self.obj.as_ref().unwrap() + } + + /// Gets mutable reference to the underlying object. + /// + /// Note that care must be taken while writing to the underlying + /// object. But, e.g. `get_mut().flush()` is claimed to be safe and + /// useful in the situations when one needs to be ensured that + /// tar entry was flushed to the disk. + pub fn get_mut(&mut self) -> &mut W { + self.obj.as_mut().unwrap() + } + + /// Unwrap this archive, returning the underlying object. + /// + /// This function will finish writing the archive if the `finish` function + /// hasn't yet been called, returning any I/O error which happens during + /// that operation. + pub async fn into_inner(mut self) -> io::Result { + if !self.finished { + self.finish().await?; + } + Ok(self.obj.take().unwrap()) + } + + /// Adds a new entry to this archive. + /// + /// This function will append the header specified, followed by contents of + /// the stream specified by `data`. To produce a valid archive the `size` + /// field of `header` must be the same as the length of the stream that's + /// being written. Additionally the checksum for the header should have been + /// set via the `set_cksum` method. + /// + /// Note that this will not attempt to seek the archive to a valid position, + /// so if the archive is in the middle of a read or some other similar + /// operation then this may corrupt the archive. + /// + /// Also note that after all entries have been written to an archive the + /// `finish` function needs to be called to finish writing the archive. + /// + /// # Errors + /// + /// This function will return an error for any intermittent I/O error which + /// occurs when either reading or writing. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), Box> { tokio::runtime::Runtime::new().unwrap().block_on(async { + /// # + /// use tokio_tar::{Builder, Header}; + /// + /// let mut header = Header::new_gnu(); + /// header.set_path("foo")?; + /// header.set_size(4); + /// header.set_cksum(); + /// + /// let mut data: &[u8] = &[1, 2, 3, 4]; + /// + /// let mut ar = Builder::new(Vec::new()); + /// ar.append(&header, data).await?; + /// let data = ar.into_inner().await?; + /// # + /// # Ok(()) }) } + /// ``` + pub async fn append( + &mut self, + header: &Header, + mut data: R, + ) -> io::Result<()> { + append(self.get_mut(), header, &mut data).await?; + + Ok(()) + } + + /// Adds a new entry to this archive with the specified path. + /// + /// This function will set the specified path in the given header, which may + /// require appending a GNU long-name extension entry to the archive first. + /// The checksum for the header will be automatically updated via the + /// `set_cksum` method after setting the path. No other metadata in the + /// header will be modified. + /// + /// Then it will append the header, followed by contents of the stream + /// specified by `data`. To produce a valid archive the `size` field of + /// `header` must be the same as the length of the stream that's being + /// written. + /// + /// Note that this will not attempt to seek the archive to a valid position, + /// so if the archive is in the middle of a read or some other similar + /// operation then this may corrupt the archive. + /// + /// Also note that after all entries have been written to an archive the + /// `finish` function needs to be called to finish writing the archive. + /// + /// # Errors + /// + /// This function will return an error for any intermittent I/O error which + /// occurs when either reading or writing. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), Box> { tokio::runtime::Runtime::new().unwrap().block_on(async { + /// # + /// use tokio_tar::{Builder, Header}; + /// + /// let mut header = Header::new_gnu(); + /// header.set_size(4); + /// header.set_cksum(); + /// + /// let mut data: &[u8] = &[1, 2, 3, 4]; + /// + /// let mut ar = Builder::new(Vec::new()); + /// ar.append_data(&mut header, "really/long/path/to/foo", data).await?; + /// let data = ar.into_inner().await?; + /// # + /// # Ok(()) }) } + /// ``` + pub async fn append_data, R: Read + Unpin>( + &mut self, + header: &mut Header, + path: P, + data: R, + ) -> io::Result<()> { + prepare_header_path(self.get_mut(), header, path.as_ref()).await?; + header.set_cksum(); + self.append(&header, data).await?; + + Ok(()) + } + + /// Adds a file on the local filesystem to this archive. + /// + /// This function will open the file specified by `path` and insert the file + /// into the archive with the appropriate metadata set, returning any I/O + /// error which occurs while writing. The path name for the file inside of + /// this archive will be the same as `path`, and it is required that the + /// path is a relative path. + /// + /// Note that this will not attempt to seek the archive to a valid position, + /// so if the archive is in the middle of a read or some other similar + /// operation then this may corrupt the archive. + /// + /// Also note that after all files have been written to an archive the + /// `finish` function needs to be called to finish writing the archive. + /// + /// # Examples + /// + /// ```no_run + /// # fn main() -> Result<(), Box> { tokio::runtime::Runtime::new().unwrap().block_on(async { + /// # + /// use tokio_tar::Builder; + /// + /// let mut ar = Builder::new(Vec::new()); + /// + /// ar.append_path("foo/bar.txt").await?; + /// # + /// # Ok(()) }) } + /// ``` + pub async fn append_path>(&mut self, path: P) -> io::Result<()> { + let mode = self.mode; + let follow = self.follow; + append_path_with_name(self.get_mut(), path.as_ref(), None, mode, follow).await?; + Ok(()) + } + + /// Adds a file on the local filesystem to this archive under another name. + /// + /// This function will open the file specified by `path` and insert the file + /// into the archive as `name` with appropriate metadata set, returning any + /// I/O error which occurs while writing. The path name for the file inside + /// of this archive will be `name` is required to be a relative path. + /// + /// Note that this will not attempt to seek the archive to a valid position, + /// so if the archive is in the middle of a read or some other similar + /// operation then this may corrupt the archive. + /// + /// Also note that after all files have been written to an archive the + /// `finish` function needs to be called to finish writing the archive. + /// + /// # Examples + /// + /// ```no_run + /// # fn main() -> Result<(), Box> { tokio::runtime::Runtime::new().unwrap().block_on(async { + /// # + /// use tokio_tar::Builder; + /// + /// let mut ar = Builder::new(Vec::new()); + /// + /// // Insert the local file "foo/bar.txt" in the archive but with the name + /// // "bar/foo.txt". + /// ar.append_path_with_name("foo/bar.txt", "bar/foo.txt").await?; + /// # + /// # Ok(()) }) } + /// ``` + pub async fn append_path_with_name, N: AsRef>( + &mut self, + path: P, + name: N, + ) -> io::Result<()> { + let mode = self.mode; + let follow = self.follow; + append_path_with_name( + self.get_mut(), + path.as_ref(), + Some(name.as_ref()), + mode, + follow, + ) + .await?; + Ok(()) + } + + /// Adds a file to this archive with the given path as the name of the file + /// in the archive. + /// + /// This will use the metadata of `file` to populate a `Header`, and it will + /// then append the file to the archive with the name `path`. + /// + /// Note that this will not attempt to seek the archive to a valid position, + /// so if the archive is in the middle of a read or some other similar + /// operation then this may corrupt the archive. + /// + /// Also note that after all files have been written to an archive the + /// `finish` function needs to be called to finish writing the archive. + /// + /// # Examples + /// + /// ```no_run + /// # fn main() -> Result<(), Box> { tokio::runtime::Runtime::new().unwrap().block_on(async { + /// # + /// use tokio::fs::File; + /// use tokio_tar::Builder; + /// + /// let mut ar = Builder::new(Vec::new()); + /// + /// // Open the file at one location, but insert it into the archive with a + /// // different name. + /// let mut f = File::open("foo/bar/baz.txt").await?; + /// ar.append_file("bar/baz.txt", &mut f).await?; + /// # + /// # Ok(()) }) } + /// ``` + pub async fn append_file>( + &mut self, + path: P, + file: &mut fs::File, + ) -> io::Result<()> { + let mode = self.mode; + append_file(self.get_mut(), path.as_ref(), file, mode).await?; + Ok(()) + } + + /// Adds a directory to this archive with the given path as the name of the + /// directory in the archive. + /// + /// This will use `stat` to populate a `Header`, and it will then append the + /// directory to the archive with the name `path`. + /// + /// Note that this will not attempt to seek the archive to a valid position, + /// so if the archive is in the middle of a read or some other similar + /// operation then this may corrupt the archive. + /// + /// Also note that after all files have been written to an archive the + /// `finish` function needs to be called to finish writing the archive. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), Box> { tokio::runtime::Runtime::new().unwrap().block_on(async { + /// # + /// use tokio::fs; + /// use tokio_tar::Builder; + /// + /// let mut ar = Builder::new(Vec::new()); + /// + /// // Use the directory at one location, but insert it into the archive + /// // with a different name. + /// ar.append_dir("bardir", ".").await?; + /// # + /// # Ok(()) }) } + /// ``` + pub async fn append_dir(&mut self, path: P, src_path: Q) -> io::Result<()> + where + P: AsRef, + Q: AsRef, + { + let mode = self.mode; + append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), mode).await?; + Ok(()) + } + + /// Adds a directory and all of its contents (recursively) to this archive + /// with the given path as the name of the directory in the archive. + /// + /// Note that this will not attempt to seek the archive to a valid position, + /// so if the archive is in the middle of a read or some other similar + /// operation then this may corrupt the archive. + /// + /// Also note that after all files have been written to an archive the + /// `finish` function needs to be called to finish writing the archive. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), Box> { tokio::runtime::Runtime::new().unwrap().block_on(async { + /// # + /// use tokio::fs; + /// use tokio_tar::Builder; + /// + /// let mut ar = Builder::new(Vec::new()); + /// + /// // Use the directory at one location, but insert it into the archive + /// // with a different name. + /// ar.append_dir_all("bardir", ".").await?; + /// # + /// # Ok(()) }) } + /// ``` + pub async fn append_dir_all(&mut self, path: P, src_path: Q) -> io::Result<()> + where + P: AsRef, + Q: AsRef, + { + let mode = self.mode; + let follow = self.follow; + append_dir_all( + self.get_mut(), + path.as_ref(), + src_path.as_ref(), + mode, + follow, + ) + .await?; + Ok(()) + } + + /// Finish writing this archive, emitting the termination sections. + /// + /// This function should only be called when the archive has been written + /// entirely and if an I/O error happens the underlying object still needs + /// to be acquired. + /// + /// In most situations the `into_inner` method should be preferred. + pub async fn finish(&mut self) -> io::Result<()> { + if self.finished { + return Ok(()); + } + self.finished = true; + self.get_mut().write_all(&[0; 1024]).await?; + Ok(()) + } +} + +async fn append( + mut dst: &mut Dst, + header: &Header, + mut data: &mut Data, +) -> io::Result<()> { + dst.write_all(header.as_bytes()).await?; + let len = io::copy(&mut data, &mut dst).await?; + + // Pad with zeros if necessary. + let buf = [0; 512]; + let remaining = 512 - (len % 512); + if remaining < 512 { + dst.write_all(&buf[..remaining as usize]).await?; + } + + Ok(()) +} + +async fn append_path_with_name( + dst: &mut Dst, + path: &Path, + name: Option<&Path>, + mode: HeaderMode, + follow: bool, +) -> io::Result<()> { + let stat = if follow { + fs::metadata(path).await.map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting metadata for {}", err, path.display()), + ) + })? + } else { + fs::symlink_metadata(path).await.map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting metadata for {}", err, path.display()), + ) + })? + }; + let ar_name = name.unwrap_or(path); + if stat.is_file() { + append_fs( + dst, + ar_name, + &stat, + &mut fs::File::open(path).await?, + mode, + None, + ) + .await?; + Ok(()) + } else if stat.is_dir() { + append_fs(dst, ar_name, &stat, &mut io::empty(), mode, None).await?; + Ok(()) + } else if stat.file_type().is_symlink() { + let link_name = fs::read_link(path).await?; + append_fs( + dst, + ar_name, + &stat, + &mut io::empty(), + mode, + Some(&link_name), + ) + .await?; + Ok(()) + } else { + Err(other(&format!("{} has unknown file type", path.display()))) + } +} + +async fn append_file( + dst: &mut Dst, + path: &Path, + file: &mut fs::File, + mode: HeaderMode, +) -> io::Result<()> { + let stat = file.metadata().await?; + append_fs(dst, path, &stat, file, mode, None).await?; + Ok(()) +} + +async fn append_dir( + dst: &mut Dst, + path: &Path, + src_path: &Path, + mode: HeaderMode, +) -> io::Result<()> { + let stat = fs::metadata(src_path).await?; + append_fs(dst, path, &stat, &mut io::empty(), mode, None).await?; + Ok(()) +} + +fn prepare_header(size: u64, entry_type: EntryType) -> Header { + let mut header = Header::new_gnu(); + let name = b"././@LongLink"; + header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]); + header.set_mode(0o644); + header.set_uid(0); + header.set_gid(0); + header.set_mtime(0); + // + 1 to be compliant with GNU tar + header.set_size(size + 1); + header.set_entry_type(entry_type); + header.set_cksum(); + header +} + +async fn prepare_header_path( + dst: &mut Dst, + header: &mut Header, + path: &Path, +) -> io::Result<()> { + // Try to encode the path directly in the header, but if it ends up not + // working (probably because it's too long) then try to use the GNU-specific + // long name extension by emitting an entry which indicates that it's the + // filename. + if let Err(e) = header.set_path(path) { + let data = path2bytes(&path)?; + let max = header.as_old().name.len(); + // Since e isn't specific enough to let us know the path is indeed too + // long, verify it first before using the extension. + if data.len() < max { + return Err(e); + } + let header2 = prepare_header(data.len() as u64, EntryType::GNULongName); + // null-terminated string + let mut data2 = data.chain(io::repeat(0).take(1)); + append(dst, &header2, &mut data2).await?; + // Truncate the path to store in the header we're about to emit to + // ensure we've got something at least mentioned. + let path = bytes2path(Cow::Borrowed(&data[..max]))?; + header.set_path(&path)?; + } + Ok(()) +} + +async fn prepare_header_link( + dst: &mut Dst, + header: &mut Header, + link_name: &Path, +) -> io::Result<()> { + // Same as previous function but for linkname + if let Err(e) = header.set_link_name(&link_name) { + let data = path2bytes(&link_name)?; + if data.len() < header.as_old().linkname.len() { + return Err(e); + } + let header2 = prepare_header(data.len() as u64, EntryType::GNULongLink); + let mut data2 = data.chain(io::repeat(0).take(1)); + append(dst, &header2, &mut data2).await?; + } + Ok(()) +} + +async fn append_fs( + dst: &mut Dst, + path: &Path, + meta: &Metadata, + read: &mut R, + mode: HeaderMode, + link_name: Option<&Path>, +) -> io::Result<()> { + let mut header = Header::new_gnu(); + + prepare_header_path(dst, &mut header, path).await?; + header.set_metadata_in_mode(meta, mode); + if let Some(link_name) = link_name { + prepare_header_link(dst, &mut header, link_name).await?; + } + header.set_cksum(); + append(dst, &header, read).await?; + + Ok(()) +} + +async fn append_dir_all( + dst: &mut Dst, + path: &Path, + src_path: &Path, + mode: HeaderMode, + follow: bool, +) -> io::Result<()> { + let mut stack = vec![(src_path.to_path_buf(), true, false)]; + while let Some((src, is_dir, is_symlink)) = stack.pop() { + let dest = path.join(src.strip_prefix(&src_path).unwrap()); + + // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true + if is_dir || (is_symlink && follow && src.is_dir()) { + let mut entries = fs::read_dir(&src).await?; + while let Some(entry) = entries.next_entry().await.transpose() { + let entry = entry?; + let file_type = entry.file_type().await?; + stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink())); + } + if dest != Path::new("") { + append_dir(dst, &dest, &src, mode).await?; + } + } else if !follow && is_symlink { + let stat = fs::symlink_metadata(&src).await?; + let link_name = fs::read_link(&src).await?; + append_fs(dst, &dest, &stat, &mut io::empty(), mode, Some(&link_name)).await?; + } else { + append_file(dst, &dest, &mut fs::File::open(src).await?, mode).await?; + } + } + Ok(()) +} + +impl Drop for Builder { + fn drop(&mut self) { + // TODO: proper async cancellation + if !self.finished { + let _ = self + .cancellation + .take() + .unwrap() + .send(self.obj.take().unwrap()); + } + } +} diff --git a/src/entry.rs b/src/entry.rs new file mode 100644 index 00000000..e239799b --- /dev/null +++ b/src/entry.rs @@ -0,0 +1,955 @@ +use crate::{ + error::TarError, header::bytes2path, other, pax::pax_extensions, Archive, Header, PaxExtensions, +}; +use filetime::{self, FileTime}; +use std::{ + borrow::Cow, + cmp, fmt, + io::{Error, ErrorKind, SeekFrom}, + marker, + path::{Component, Path, PathBuf}, + pin::Pin, + task::{Context, Poll}, +}; +use tokio::{ + fs, + fs::OpenOptions, + io::{self, AsyncRead as Read, AsyncReadExt, AsyncSeekExt}, +}; + +/// A read-only view into an entry of an archive. +/// +/// This structure is a window into a portion of a borrowed archive which can +/// be inspected. It acts as a file handle by implementing the Reader trait. An +/// entry cannot be rewritten once inserted into an archive. +pub struct Entry { + fields: EntryFields, + _ignored: marker::PhantomData>, +} + +impl fmt::Debug for Entry { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Entry") + .field("fields", &self.fields) + .finish() + } +} + +// private implementation detail of `Entry`, but concrete (no type parameters) +// and also all-public to be constructed from other modules. +pub struct EntryFields { + pub long_pathname: Option>, + pub long_linkname: Option>, + pub pax_extensions: Option>, + pub header: Header, + pub size: u64, + pub header_pos: u64, + pub file_pos: u64, + pub data: Vec>, + pub unpack_xattrs: bool, + pub preserve_permissions: bool, + pub preserve_mtime: bool, + pub(crate) read_state: Option>, +} + +impl fmt::Debug for EntryFields { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("EntryFields") + .field("long_pathname", &self.long_pathname) + .field("long_linkname", &self.long_linkname) + .field("pax_extensions", &self.pax_extensions) + .field("header", &self.header) + .field("size", &self.size) + .field("header_pos", &self.header_pos) + .field("file_pos", &self.file_pos) + .field("data", &self.data) + .field("unpack_xattrs", &self.unpack_xattrs) + .field("preserve_permissions", &self.preserve_permissions) + .field("preserve_mtime", &self.preserve_mtime) + .field("read_state", &self.read_state) + .finish() + } +} + +pub enum EntryIo { + Pad(io::Take), + Data(io::Take), +} + +impl fmt::Debug for EntryIo { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + EntryIo::Pad(_) => write!(f, "EntryIo::Pad"), + EntryIo::Data(_) => write!(f, "EntryIo::Data"), + } + } +} + +/// When unpacking items the unpacked thing is returned to allow custom +/// additional handling by users. Today the File is returned, in future +/// the enum may be extended with kinds for links, directories etc. +#[derive(Debug)] +#[non_exhaustive] +pub enum Unpacked { + /// A file was unpacked. + File(fs::File), + /// A directory, hardlink, symlink, or other node was unpacked. + Other, +} + +impl Entry { + /// Returns the path name for this entry. + /// + /// This method may fail if the pathname is not valid Unicode and this is + /// called on a Windows platform. + /// + /// Note that this function will convert any `\` characters to directory + /// separators, and it will not always return the same value as + /// `self.header().path()` as some archive formats have support for longer + /// path names described in separate entries. + /// + /// It is recommended to use this method instead of inspecting the `header` + /// directly to ensure that various archive formats are handled correctly. + pub fn path(&self) -> io::Result> { + self.fields.path() + } + + /// Returns the raw bytes listed for this entry. + /// + /// Note that this function will convert any `\` characters to directory + /// separators, and it will not always return the same value as + /// `self.header().path_bytes()` as some archive formats have support for + /// longer path names described in separate entries. + pub fn path_bytes(&self) -> Cow<[u8]> { + self.fields.path_bytes() + } + + /// Returns the link name for this entry, if any is found. + /// + /// This method may fail if the pathname is not valid Unicode and this is + /// called on a Windows platform. `Ok(None)` being returned, however, + /// indicates that the link name was not present. + /// + /// Note that this function will convert any `\` characters to directory + /// separators, and it will not always return the same value as + /// `self.header().link_name()` as some archive formats have support for + /// longer path names described in separate entries. + /// + /// It is recommended to use this method instead of inspecting the `header` + /// directly to ensure that various archive formats are handled correctly. + pub fn link_name(&self) -> io::Result>> { + self.fields.link_name() + } + + /// Returns the link name for this entry, in bytes, if listed. + /// + /// Note that this will not always return the same value as + /// `self.header().link_name_bytes()` as some archive formats have support for + /// longer path names described in separate entries. + pub fn link_name_bytes(&self) -> Option> { + self.fields.link_name_bytes() + } + + /// Returns an iterator over the pax extensions contained in this entry. + /// + /// Pax extensions are a form of archive where extra metadata is stored in + /// key/value pairs in entries before the entry they're intended to + /// describe. For example this can be used to describe long file name or + /// other metadata like atime/ctime/mtime in more precision. + /// + /// The returned iterator will yield key/value pairs for each extension. + /// + /// `None` will be returned if this entry does not indicate that it itself + /// contains extensions, or if there were no previous extensions describing + /// it. + /// + /// Note that global pax extensions are intended to be applied to all + /// archive entries. + /// + /// Also note that this function will read the entire entry if the entry + /// itself is a list of extensions. + pub async fn pax_extensions(&mut self) -> io::Result>> { + self.fields.pax_extensions().await + } + + /// Returns access to the header of this entry in the archive. + /// + /// This provides access to the metadata for this entry in the archive. + pub fn header(&self) -> &Header { + &self.fields.header + } + + /// Returns the starting position, in bytes, of the header of this entry in + /// the archive. + /// + /// The header is always a contiguous section of 512 bytes, so if the + /// underlying reader implements `Seek`, then the slice from `header_pos` to + /// `header_pos + 512` contains the raw header bytes. + pub fn raw_header_position(&self) -> u64 { + self.fields.header_pos + } + + /// Returns the starting position, in bytes, of the file of this entry in + /// the archive. + /// + /// If the file of this entry is continuous (e.g. not a sparse file), and + /// if the underlying reader implements `Seek`, then the slice from + /// `file_pos` to `file_pos + entry_size` contains the raw file bytes. + pub fn raw_file_position(&self) -> u64 { + self.fields.file_pos + } + + /// Writes this file to the specified location. + /// + /// This function will write the entire contents of this file into the + /// location specified by `dst`. Metadata will also be propagated to the + /// path `dst`. + /// + /// This function will create a file at the path `dst`, and it is required + /// that the intermediate directories are created. Any existing file at the + /// location `dst` will be overwritten. + /// + /// > **Note**: This function does not have as many sanity checks as + /// > `Archive::unpack` or `Entry::unpack_in`. As a result if you're + /// > thinking of unpacking untrusted tarballs you may want to review the + /// > implementations of the previous two functions and perhaps implement + /// > similar logic yourself. + /// + /// # Examples + /// + /// ```no_run + /// # fn main() -> Result<(), Box> { tokio::runtime::Runtime::new().unwrap().block_on(async { + /// # + /// use tokio::fs::File; + /// use tokio_tar::Archive; + /// use tokio_stream::*; + /// + /// let mut ar = Archive::new(File::open("foo.tar").await?); + /// let mut entries = ar.entries()?; + /// let mut i = 0; + /// while let Some(file) = entries.next().await { + /// let mut file = file?; + /// file.unpack(format!("file-{}", i)).await?; + /// i += 1; + /// } + /// # + /// # Ok(()) }) } + /// ``` + pub async fn unpack>(&mut self, dst: P) -> io::Result { + self.fields.unpack(None, dst.as_ref()).await + } + + /// Extracts this file under the specified path, avoiding security issues. + /// + /// This function will write the entire contents of this file into the + /// location obtained by appending the path of this file in the archive to + /// `dst`, creating any intermediate directories if needed. Metadata will + /// also be propagated to the path `dst`. Any existing file at the location + /// `dst` will be overwritten. + /// + /// This function carefully avoids writing outside of `dst`. If the file has + /// a '..' in its path, this function will skip it and return false. + /// + /// # Examples + /// + /// ```no_run + /// # fn main() -> Result<(), Box> { tokio::runtime::Runtime::new().unwrap().block_on(async { + /// # + /// use tokio::{fs::File, stream::*}; + /// use tokio_tar::Archive; + /// use tokio_stream::*; + /// + /// let mut ar = Archive::new(File::open("foo.tar").await?); + /// let mut entries = ar.entries()?; + /// let mut i = 0; + /// while let Some(file) = entries.next().await { + /// let mut file = file.unwrap(); + /// file.unpack_in("target").await?; + /// i += 1; + /// } + /// # + /// # Ok(()) }) } + /// ``` + pub async fn unpack_in>(&mut self, dst: P) -> io::Result { + self.fields.unpack_in(dst.as_ref()).await + } + + /// Indicate whether extended file attributes (xattrs on Unix) are preserved + /// when unpacking this entry. + /// + /// This flag is disabled by default and is currently only implemented on + /// Unix using xattr support. This may eventually be implemented for + /// Windows, however, if other archive implementations are found which do + /// this as well. + pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) { + self.fields.unpack_xattrs = unpack_xattrs; + } + + /// Indicate whether extended permissions (like suid on Unix) are preserved + /// when unpacking this entry. + /// + /// This flag is disabled by default and is currently only implemented on + /// Unix. + pub fn set_preserve_permissions(&mut self, preserve: bool) { + self.fields.preserve_permissions = preserve; + } + + /// Indicate whether access time information is preserved when unpacking + /// this entry. + /// + /// This flag is enabled by default. + pub fn set_preserve_mtime(&mut self, preserve: bool) { + self.fields.preserve_mtime = preserve; + } +} + +impl Read for Entry { + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + into: &mut io::ReadBuf<'_>, + ) -> Poll> { + Pin::new(&mut self.as_mut().fields).poll_read(cx, into) + } +} + +impl EntryFields { + pub fn from(entry: Entry) -> Self { + entry.fields + } + + pub fn into_entry(self) -> Entry { + Entry { + fields: self, + _ignored: marker::PhantomData, + } + } + + pub(crate) fn poll_read_all( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll>> { + // Preallocate some data but don't let ourselves get too crazy now. + let cap = cmp::min(self.size, 128 * 1024); + let mut buf = Vec::with_capacity(cap as usize); + + // Copied from futures::ReadToEnd + match futures_core::ready!(poll_read_all_internal(self, cx, &mut buf)) { + Ok(_) => Poll::Ready(Ok(buf)), + Err(err) => Poll::Ready(Err(err)), + } + } + + pub async fn read_all(&mut self) -> io::Result> { + // Preallocate some data but don't let ourselves get too crazy now. + let cap = cmp::min(self.size, 128 * 1024); + let mut v = Vec::with_capacity(cap as usize); + self.read_to_end(&mut v).await.map(|_| v) + } + + fn path(&self) -> io::Result> { + bytes2path(self.path_bytes()) + } + + fn path_bytes(&self) -> Cow<[u8]> { + match self.long_pathname { + Some(ref bytes) => { + if let Some(&0) = bytes.last() { + Cow::Borrowed(&bytes[..bytes.len() - 1]) + } else { + Cow::Borrowed(bytes) + } + } + None => { + if let Some(ref pax) = self.pax_extensions { + let pax = pax_extensions(pax) + .filter_map(|f| f.ok()) + .find(|f| f.key_bytes() == b"path") + .map(|f| f.value_bytes()); + if let Some(field) = pax { + return Cow::Borrowed(field); + } + } + self.header.path_bytes() + } + } + } + + /// Gets the path in a "lossy" way, used for error reporting ONLY. + fn path_lossy(&self) -> String { + String::from_utf8_lossy(&self.path_bytes()).to_string() + } + + fn link_name(&self) -> io::Result>> { + match self.link_name_bytes() { + Some(bytes) => bytes2path(bytes).map(Some), + None => Ok(None), + } + } + + fn link_name_bytes(&self) -> Option> { + match self.long_linkname { + Some(ref bytes) => { + if let Some(&0) = bytes.last() { + Some(Cow::Borrowed(&bytes[..bytes.len() - 1])) + } else { + Some(Cow::Borrowed(bytes)) + } + } + None => self.header.link_name_bytes(), + } + } + + async fn pax_extensions(&mut self) -> io::Result>> { + if self.pax_extensions.is_none() { + if !self.header.entry_type().is_pax_global_extensions() + && !self.header.entry_type().is_pax_local_extensions() + { + return Ok(None); + } + self.pax_extensions = Some(self.read_all().await?); + } + Ok(Some(pax_extensions(self.pax_extensions.as_ref().unwrap()))) + } + + async fn unpack_in(&mut self, dst: &Path) -> io::Result { + // Notes regarding bsdtar 2.8.3 / libarchive 2.8.3: + // * Leading '/'s are trimmed. For example, `///test` is treated as + // `test`. + // * If the filename contains '..', then the file is skipped when + // extracting the tarball. + // * '//' within a filename is effectively skipped. An error is + // logged, but otherwise the effect is as if any two or more + // adjacent '/'s within the filename were consolidated into one + // '/'. + // + // Most of this is handled by the `path` module of the standard + // library, but we specially handle a few cases here as well. + + let mut file_dst = dst.to_path_buf(); + { + let path = self.path().map_err(|e| { + TarError::new( + &format!("invalid path in entry header: {}", self.path_lossy()), + e, + ) + })?; + for part in path.components() { + match part { + // Leading '/' characters, root paths, and '.' + // components are just ignored and treated as "empty + // components" + Component::Prefix(..) | Component::RootDir | Component::CurDir => continue, + + // If any part of the filename is '..', then skip over + // unpacking the file to prevent directory traversal + // security issues. See, e.g.: CVE-2001-1267, + // CVE-2002-0399, CVE-2005-1918, CVE-2007-4131 + Component::ParentDir => return Ok(false), + + Component::Normal(part) => file_dst.push(part), + } + } + } + + // Skip cases where only slashes or '.' parts were seen, because + // this is effectively an empty filename. + if *dst == *file_dst { + return Ok(true); + } + + // Skip entries without a parent (i.e. outside of FS root) + let parent = match file_dst.parent() { + Some(p) => p, + None => return Ok(false), + }; + + if parent.symlink_metadata().is_err() { + println!("create_dir_all {:?}", parent); + fs::create_dir_all(&parent).await.map_err(|e| { + TarError::new(&format!("failed to create `{}`", parent.display()), e) + })?; + } + + let canon_target = self.validate_inside_dst(&dst, parent).await?; + + self.unpack(Some(&canon_target), &file_dst) + .await + .map_err(|e| TarError::new(&format!("failed to unpack `{}`", file_dst.display()), e))?; + + Ok(true) + } + + /// Unpack as destination directory `dst`. + async fn unpack_dir(&mut self, dst: &Path) -> io::Result<()> { + // If the directory already exists just let it slide + match fs::create_dir(dst).await { + Ok(()) => Ok(()), + Err(err) => { + if err.kind() == ErrorKind::AlreadyExists { + let prev = fs::metadata(dst).await; + if prev.map(|m| m.is_dir()).unwrap_or(false) { + return Ok(()); + } + } + Err(Error::new( + err.kind(), + format!("{} when creating dir {}", err, dst.display()), + )) + } + } + } + + /// Returns access to the header of this entry in the archive. + async fn unpack(&mut self, target_base: Option<&Path>, dst: &Path) -> io::Result { + let kind = self.header.entry_type(); + + if kind.is_dir() { + self.unpack_dir(dst).await?; + if let Ok(mode) = self.header.mode() { + set_perms(dst, None, mode, self.preserve_permissions).await?; + } + return Ok(Unpacked::Other); + } else if kind.is_hard_link() || kind.is_symlink() { + let src = match self.link_name()? { + Some(name) => name, + None => { + return Err(other(&format!( + "hard link listed for {} but no link name found", + String::from_utf8_lossy(self.header.as_bytes()) + ))); + } + }; + + if src.iter().count() == 0 { + return Err(other(&format!( + "symlink destination for {} is empty", + String::from_utf8_lossy(self.header.as_bytes()) + ))); + } + + if kind.is_hard_link() { + let link_src = match target_base { + // If we're unpacking within a directory then ensure that + // the destination of this hard link is both present and + // inside our own directory. This is needed because we want + // to make sure to not overwrite anything outside the root. + // + // Note that this logic is only needed for hard links + // currently. With symlinks the `validate_inside_dst` which + // happens before this method as part of `unpack_in` will + // use canonicalization to ensure this guarantee. For hard + // links though they're canonicalized to their existing path + // so we need to validate at this time. + Some(ref p) => { + let link_src = p.join(src); + self.validate_inside_dst(p, &link_src).await?; + link_src + } + None => src.into_owned(), + }; + fs::hard_link(&link_src, dst).await.map_err(|err| { + Error::new( + err.kind(), + format!( + "{} when hard linking {} to {}", + err, + link_src.display(), + dst.display() + ), + ) + })?; + } else { + symlink(&src, dst).await.map_err(|err| { + Error::new( + err.kind(), + format!( + "{} when symlinking {} to {}", + err, + src.display(), + dst.display() + ), + ) + })?; + }; + return Ok(Unpacked::Other); + + #[cfg(target_arch = "wasm32")] + #[allow(unused_variables)] + async fn symlink(src: &Path, dst: &Path) -> io::Result<()> { + Err(io::Error::new(io::ErrorKind::Other, "Not implemented")) + } + + #[cfg(windows)] + async fn symlink(src: &Path, dst: &Path) -> io::Result<()> { + tokio::fs::os::windows::symlink_file(src, dst).await + } + + #[cfg(any(unix, target_os = "redox"))] + async fn symlink(src: &Path, dst: &Path) -> io::Result<()> { + tokio::fs::symlink(src, dst).await + } + } else if kind.is_pax_global_extensions() + || kind.is_pax_local_extensions() + || kind.is_gnu_longname() + || kind.is_gnu_longlink() + { + return Ok(Unpacked::Other); + }; + + // Old BSD-tar compatibility. + // Names that have a trailing slash should be treated as a directory. + // Only applies to old headers. + if self.header.as_ustar().is_none() && self.path_bytes().ends_with(b"/") { + self.unpack_dir(dst).await?; + if let Ok(mode) = self.header.mode() { + set_perms(dst, None, mode, self.preserve_permissions).await?; + } + return Ok(Unpacked::Other); + } + + // Note the lack of `else` clause above. According to the FreeBSD + // documentation: + // + // > A POSIX-compliant implementation must treat any unrecognized + // > typeflag value as a regular file. + // + // As a result if we don't recognize the kind we just write out the file + // as we would normally. + + // Ensure we write a new file rather than overwriting in-place which + // is attackable; if an existing file is found unlink it. + async fn open(dst: &Path) -> io::Result { + OpenOptions::new() + .write(true) + .create_new(true) + .open(dst) + .await + } + + let mut f = async { + let mut f = match open(dst).await { + Ok(f) => Ok(f), + Err(err) => { + if err.kind() != ErrorKind::AlreadyExists { + Err(err) + } else { + match fs::remove_file(dst).await { + Ok(()) => open(dst).await, + Err(ref e) if e.kind() == io::ErrorKind::NotFound => open(dst).await, + Err(e) => Err(e), + } + } + } + }?; + for io in self.data.drain(..) { + match io { + EntryIo::Data(mut d) => { + let expected = d.limit(); + if io::copy(&mut d, &mut f).await? != expected { + return Err(other("failed to write entire file")); + } + } + EntryIo::Pad(d) => { + // TODO: checked cast to i64 + let to = SeekFrom::Current(d.limit() as i64); + let size = f.seek(to).await?; + f.set_len(size).await?; + } + } + } + Ok::(f) + } + .await + .map_err(|e| { + let header = self.header.path_bytes(); + TarError::new( + &format!( + "failed to unpack `{}` into `{}`", + String::from_utf8_lossy(&header), + dst.display() + ), + e, + ) + })?; + + if self.preserve_mtime { + if let Ok(mtime) = self.header.mtime() { + let mtime = FileTime::from_unix_time(mtime as i64, 0); + filetime::set_file_times(&dst, mtime, mtime).map_err(|e| { + TarError::new(&format!("failed to set mtime for `{}`", dst.display()), e) + })?; + } + } + if let Ok(mode) = self.header.mode() { + set_perms(dst, Some(&mut f), mode, self.preserve_permissions).await?; + } + if self.unpack_xattrs { + set_xattrs(self, dst).await?; + } + return Ok(Unpacked::File(f)); + + async fn set_perms( + dst: &Path, + f: Option<&mut fs::File>, + mode: u32, + preserve: bool, + ) -> Result<(), TarError> { + _set_perms(dst, f, mode, preserve).await.map_err(|e| { + TarError::new( + &format!( + "failed to set permissions to {:o} \ + for `{}`", + mode, + dst.display() + ), + e, + ) + }) + } + + #[cfg(any(unix, target_os = "redox"))] + async fn _set_perms( + dst: &Path, + f: Option<&mut fs::File>, + mode: u32, + preserve: bool, + ) -> io::Result<()> { + use std::os::unix::prelude::*; + + let mode = if preserve { mode } else { mode & 0o777 }; + let perm = std::fs::Permissions::from_mode(mode as _); + match f { + Some(f) => f.set_permissions(perm).await, + None => fs::set_permissions(dst, perm).await, + } + } + + #[cfg(windows)] + async fn _set_perms( + dst: &Path, + f: Option<&mut fs::File>, + mode: u32, + _preserve: bool, + ) -> io::Result<()> { + if mode & 0o200 == 0o200 { + return Ok(()); + } + match f { + Some(f) => { + let mut perm = f.metadata().await?.permissions(); + perm.set_readonly(true); + f.set_permissions(perm).await + } + None => { + let mut perm = fs::metadata(dst).await?.permissions(); + perm.set_readonly(true); + fs::set_permissions(dst, perm).await + } + } + } + + #[cfg(target_arch = "wasm32")] + #[allow(unused_variables)] + async fn _set_perms( + dst: &Path, + f: Option<&mut fs::File>, + mode: u32, + _preserve: bool, + ) -> io::Result<()> { + Err(io::Error::new(io::ErrorKind::Other, "Not implemented")) + } + + #[cfg(all(unix, feature = "xattr"))] + async fn set_xattrs( + me: &mut EntryFields, + dst: &Path, + ) -> io::Result<()> { + use std::{ffi::OsStr, os::unix::prelude::*}; + + let exts = match me.pax_extensions().await { + Ok(Some(e)) => e, + _ => return Ok(()), + }; + let exts = exts + .filter_map(|e| e.ok()) + .filter_map(|e| { + let key = e.key_bytes(); + let prefix = b"SCHILY.xattr."; + if key.starts_with(prefix) { + Some((&key[prefix.len()..], e)) + } else { + None + } + }) + .map(|(key, e)| (OsStr::from_bytes(key), e.value_bytes())); + + for (key, value) in exts { + xattr::set(dst, key, value).map_err(|e| { + TarError::new( + &format!( + "failed to set extended \ + attributes to {}. \ + Xattrs: key={:?}, value={:?}.", + dst.display(), + key, + String::from_utf8_lossy(value) + ), + e, + ) + })?; + } + + Ok(()) + } + // Windows does not completely support posix xattrs + // https://en.wikipedia.org/wiki/Extended_file_attributes#Windows_NT + #[cfg(any( + windows, + target_os = "redox", + not(feature = "xattr"), + target_arch = "wasm32" + ))] + async fn set_xattrs(_: &mut EntryFields, _: &Path) -> io::Result<()> { + Ok(()) + } + } + + async fn validate_inside_dst(&self, dst: &Path, file_dst: &Path) -> io::Result { + // Abort if target (canonical) parent is outside of `dst` + let canon_parent = file_dst.canonicalize().map_err(|err| { + Error::new( + err.kind(), + format!("{} while canonicalizing {}", err, file_dst.display()), + ) + })?; + let canon_target = dst.canonicalize().map_err(|err| { + Error::new( + err.kind(), + format!("{} while canonicalizing {}", err, dst.display()), + ) + })?; + if !canon_parent.starts_with(&canon_target) { + let err = TarError::new( + &format!( + "trying to unpack outside of destination path: {}", + canon_target.display() + ), + // TODO: use ErrorKind::InvalidInput here? (minor breaking change) + Error::new(ErrorKind::Other, "Invalid argument"), + ); + return Err(err.into()); + } + Ok(canon_target) + } +} + +impl Read for EntryFields { + fn poll_read( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + into: &mut io::ReadBuf<'_>, + ) -> Poll> { + let mut this = self.get_mut(); + loop { + if this.read_state.is_none() { + if this.data.is_empty() { + this.read_state = None; + } else { + let data = &mut this.data; + this.read_state = Some(data.remove(0)); + } + } + + if let Some(ref mut io) = &mut this.read_state { + let ret = Pin::new(io).poll_read(cx, into); + match ret { + Poll::Ready(Ok(())) if into.filled().is_empty() => { + this.read_state = None; + if this.data.is_empty() { + return Poll::Ready(Ok(())); + } + continue; + } + Poll::Ready(Ok(())) => { + return Poll::Ready(Ok(())); + } + Poll::Ready(Err(err)) => { + return Poll::Ready(Err(err)); + } + Poll::Pending => { + return Poll::Pending; + } + } + } else { + // Unable to pull another value from `data`, so we are done. + return Poll::Ready(Ok(())); + } + } + } +} + +impl Read for EntryIo { + fn poll_read( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + into: &mut io::ReadBuf<'_>, + ) -> Poll> { + match self.get_mut() { + EntryIo::Pad(ref mut io) => Pin::new(io).poll_read(cx, into), + EntryIo::Data(ref mut io) => Pin::new(io).poll_read(cx, into), + } + } +} + +struct Guard<'a> { + buf: &'a mut Vec, + len: usize, +} + +impl Drop for Guard<'_> { + fn drop(&mut self) { + unsafe { + self.buf.set_len(self.len); + } + } +} + +fn poll_read_all_internal( + mut rd: Pin<&mut R>, + cx: &mut Context<'_>, + buf: &mut Vec, +) -> Poll> { + let mut g = Guard { + len: buf.len(), + buf, + }; + let ret; + loop { + if g.len == g.buf.len() { + unsafe { + g.buf.reserve(32); + let capacity = g.buf.capacity(); + g.buf.set_len(capacity); + + let buf = &mut g.buf[g.len..]; + std::ptr::write_bytes(buf.as_mut_ptr(), 0, buf.len()); + } + } + + let mut read_buf = io::ReadBuf::new(&mut g.buf[g.len..]); + match futures_core::ready!(rd.as_mut().poll_read(cx, &mut read_buf)) { + Ok(()) if read_buf.filled().is_empty() => { + ret = Poll::Ready(Ok(g.len)); + break; + } + Ok(()) => g.len += read_buf.filled().len(), + Err(e) => { + ret = Poll::Ready(Err(e)); + break; + } + } + } + + ret +} diff --git a/src/entry_type.rs b/src/entry_type.rs new file mode 100644 index 00000000..8c1106a5 --- /dev/null +++ b/src/entry_type.rs @@ -0,0 +1,189 @@ +// See https://en.wikipedia.org/wiki/Tar_%28computing%29#UStar_format +/// Indicate for the type of file described by a header. +/// +/// Each `Header` has an `entry_type` method returning an instance of this type +/// which can be used to inspect what the header is describing. + +/// A non-exhaustive enum representing the possible entry types +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +#[non_exhaustive] +pub enum EntryType { + /// Regular file + Regular, + /// Hard link + Link, + /// Symbolic link + Symlink, + /// Character device + Char, + /// Block device + Block, + /// Directory + Directory, + /// Named pipe (fifo) + Fifo, + /// Implementation-defined 'high-performance' type, treated as regular file + Continuous, + /// GNU extension - long file name + GNULongName, + /// GNU extension - long link name (link target) + GNULongLink, + /// GNU extension - sparse file + GNUSparse, + /// Global extended header + XGlobalHeader, + /// Extended Header + XHeader, + /// Unknown header, + Other(u8), +} + +impl EntryType { + /// Creates a new entry type from a raw byte. + /// + /// Note that the other named constructors of entry type may be more + /// appropriate to create a file type from. + pub fn new(byte: u8) -> EntryType { + match byte { + b'\x00' | b'0' => EntryType::Regular, + b'1' => EntryType::Link, + b'2' => EntryType::Symlink, + b'3' => EntryType::Char, + b'4' => EntryType::Block, + b'5' => EntryType::Directory, + b'6' => EntryType::Fifo, + b'7' => EntryType::Continuous, + b'x' => EntryType::XHeader, + b'g' => EntryType::XGlobalHeader, + b'L' => EntryType::GNULongName, + b'K' => EntryType::GNULongLink, + b'S' => EntryType::GNUSparse, + other => EntryType::Other(other), + } + } + + /// Returns the raw underlying byte that this entry type represents. + pub fn as_byte(self) -> u8 { + match self { + EntryType::Regular => b'0', + EntryType::Link => b'1', + EntryType::Symlink => b'2', + EntryType::Char => b'3', + EntryType::Block => b'4', + EntryType::Directory => b'5', + EntryType::Fifo => b'6', + EntryType::Continuous => b'7', + EntryType::XHeader => b'x', + EntryType::XGlobalHeader => b'g', + EntryType::GNULongName => b'L', + EntryType::GNULongLink => b'K', + EntryType::GNUSparse => b'S', + EntryType::Other(other) => other, + } + } + + /// Creates a new entry type representing a regular file. + pub fn file() -> EntryType { + EntryType::Regular + } + + /// Creates a new entry type representing a hard link. + pub fn hard_link() -> EntryType { + EntryType::Link + } + + /// Creates a new entry type representing a symlink. + pub fn symlink() -> EntryType { + EntryType::Symlink + } + + /// Creates a new entry type representing a character special device. + pub fn character_special() -> EntryType { + EntryType::Char + } + + /// Creates a new entry type representing a block special device. + pub fn block_special() -> EntryType { + EntryType::Block + } + + /// Creates a new entry type representing a directory. + pub fn dir() -> EntryType { + EntryType::Directory + } + + /// Creates a new entry type representing a FIFO. + pub fn fifo() -> EntryType { + EntryType::Fifo + } + + /// Creates a new entry type representing a contiguous file. + pub fn contiguous() -> EntryType { + EntryType::Continuous + } + + /// Returns whether this type represents a regular file. + pub fn is_file(self) -> bool { + self == EntryType::Regular + } + + /// Returns whether this type represents a hard link. + pub fn is_hard_link(self) -> bool { + self == EntryType::Link + } + + /// Returns whether this type represents a symlink. + pub fn is_symlink(self) -> bool { + self == EntryType::Symlink + } + + /// Returns whether this type represents a character special device. + pub fn is_character_special(self) -> bool { + self == EntryType::Char + } + + /// Returns whether this type represents a block special device. + pub fn is_block_special(self) -> bool { + self == EntryType::Block + } + + /// Returns whether this type represents a directory. + pub fn is_dir(self) -> bool { + self == EntryType::Directory + } + + /// Returns whether this type represents a FIFO. + pub fn is_fifo(self) -> bool { + self == EntryType::Fifo + } + + /// Returns whether this type represents a contiguous file. + pub fn is_contiguous(self) -> bool { + self == EntryType::Continuous + } + + /// Returns whether this type represents a GNU long name header. + pub fn is_gnu_longname(self) -> bool { + self == EntryType::GNULongName + } + + /// Returns whether this type represents a GNU sparse header. + pub fn is_gnu_sparse(self) -> bool { + self == EntryType::GNUSparse + } + + /// Returns whether this type represents a GNU long link header. + pub fn is_gnu_longlink(self) -> bool { + self == EntryType::GNULongLink + } + + /// Returns whether this type represents a GNU long name header. + pub fn is_pax_global_extensions(self) -> bool { + self == EntryType::XGlobalHeader + } + + /// Returns whether this type represents a GNU long link header. + pub fn is_pax_local_extensions(self) -> bool { + self == EntryType::XHeader + } +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 00000000..ab24583b --- /dev/null +++ b/src/error.rs @@ -0,0 +1,40 @@ +use std::{error, fmt}; + +use tokio::io::{self, Error}; + +#[derive(Debug)] +pub struct TarError { + desc: String, + io: io::Error, +} + +impl TarError { + pub fn new(desc: &str, err: Error) -> TarError { + TarError { + desc: desc.to_string(), + io: err, + } + } +} + +impl error::Error for TarError { + fn description(&self) -> &str { + &self.desc + } + + fn source(&self) -> Option<&(dyn error::Error + 'static)> { + Some(&self.io) + } +} + +impl fmt::Display for TarError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.desc.fmt(f) + } +} + +impl From for Error { + fn from(t: TarError) -> Error { + Error::new(t.io.kind(), t) + } +} diff --git a/src/header.rs b/src/header.rs new file mode 100644 index 00000000..71f0deed --- /dev/null +++ b/src/header.rs @@ -0,0 +1,1620 @@ +#[cfg(any(unix, target_os = "redox"))] +use std::os::unix::prelude::*; +#[cfg(windows)] +use std::os::windows::prelude::*; + +use std::{borrow::Cow, fmt, iter, iter::repeat, mem, str}; + +use std::{ + fs::Metadata, + path::{Component, Path, PathBuf}, +}; +use tokio::io; + +use crate::{other, EntryType}; + +/// Representation of the header of an entry in an archive +#[repr(C)] +#[allow(missing_docs)] +pub struct Header { + bytes: [u8; 512], +} + +/// Declares the information that should be included when filling a Header +/// from filesystem metadata. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +#[non_exhaustive] +pub enum HeaderMode { + /// All supported metadata, including mod/access times and ownership will + /// be included. + Complete, + + /// Only metadata that is directly relevant to the identity of a file will + /// be included. In particular, ownership and mod/access times are excluded. + Deterministic, +} + +/// Representation of the header of an entry in an archive +#[repr(C)] +#[allow(missing_docs)] +pub struct OldHeader { + pub name: [u8; 100], + pub mode: [u8; 8], + pub uid: [u8; 8], + pub gid: [u8; 8], + pub size: [u8; 12], + pub mtime: [u8; 12], + pub cksum: [u8; 8], + pub linkflag: [u8; 1], + pub linkname: [u8; 100], + pub pad: [u8; 255], +} + +/// Representation of the header of an entry in an archive +#[repr(C)] +#[allow(missing_docs)] +pub struct UstarHeader { + pub name: [u8; 100], + pub mode: [u8; 8], + pub uid: [u8; 8], + pub gid: [u8; 8], + pub size: [u8; 12], + pub mtime: [u8; 12], + pub cksum: [u8; 8], + pub typeflag: [u8; 1], + pub linkname: [u8; 100], + + // UStar format + pub magic: [u8; 6], + pub version: [u8; 2], + pub uname: [u8; 32], + pub gname: [u8; 32], + pub dev_major: [u8; 8], + pub dev_minor: [u8; 8], + pub prefix: [u8; 155], + pub pad: [u8; 12], +} + +/// Representation of the header of an entry in an archive +#[repr(C)] +#[allow(missing_docs)] +pub struct GnuHeader { + pub name: [u8; 100], + pub mode: [u8; 8], + pub uid: [u8; 8], + pub gid: [u8; 8], + pub size: [u8; 12], + pub mtime: [u8; 12], + pub cksum: [u8; 8], + pub typeflag: [u8; 1], + pub linkname: [u8; 100], + + // GNU format + pub magic: [u8; 6], + pub version: [u8; 2], + pub uname: [u8; 32], + pub gname: [u8; 32], + pub dev_major: [u8; 8], + pub dev_minor: [u8; 8], + pub atime: [u8; 12], + pub ctime: [u8; 12], + pub offset: [u8; 12], + pub longnames: [u8; 4], + pub unused: [u8; 1], + pub sparse: [GnuSparseHeader; 4], + pub isextended: [u8; 1], + pub realsize: [u8; 12], + pub pad: [u8; 17], +} + +/// Description of the header of a spare entry. +/// +/// Specifies the offset/number of bytes of a chunk of data in octal. +#[repr(C)] +#[allow(missing_docs)] +pub struct GnuSparseHeader { + pub offset: [u8; 12], + pub numbytes: [u8; 12], +} + +/// Representation of the entry found to represent extended GNU sparse files. +/// +/// When a `GnuHeader` has the `isextended` flag set to `1` then the contents of +/// the next entry will be one of these headers. +#[repr(C)] +#[allow(missing_docs)] +pub struct GnuExtSparseHeader { + pub sparse: [GnuSparseHeader; 21], + pub isextended: [u8; 1], + pub padding: [u8; 7], +} + +impl Header { + /// Creates a new blank GNU header. + /// + /// The GNU style header is the default for this library and allows various + /// extensions such as long path names, long link names, and setting the + /// atime/ctime metadata attributes of files. + pub fn new_gnu() -> Header { + let mut header = Header { bytes: [0; 512] }; + unsafe { + let gnu = cast_mut::<_, GnuHeader>(&mut header); + gnu.magic = *b"ustar "; + gnu.version = *b" \0"; + } + header.set_mtime(0); + header + } + + /// Creates a new blank UStar header. + /// + /// The UStar style header is an extension of the original archive header + /// which enables some extra metadata along with storing a longer (but not + /// too long) path name. + /// + /// UStar is also the basis used for pax archives. + pub fn new_ustar() -> Header { + let mut header = Header { bytes: [0; 512] }; + unsafe { + let gnu = cast_mut::<_, UstarHeader>(&mut header); + gnu.magic = *b"ustar\0"; + gnu.version = *b"00"; + } + header.set_mtime(0); + header + } + + /// Creates a new blank old header. + /// + /// This header format is the original archive header format which all other + /// versions are compatible with (e.g. they are a superset). This header + /// format limits the path name limit and isn't able to contain extra + /// metadata like atime/ctime. + pub fn new_old() -> Header { + let mut header = Header { bytes: [0; 512] }; + header.set_mtime(0); + header + } + + fn is_ustar(&self) -> bool { + let ustar = unsafe { cast::<_, UstarHeader>(self) }; + ustar.magic[..] == b"ustar\0"[..] && ustar.version[..] == b"00"[..] + } + + fn is_gnu(&self) -> bool { + let ustar = unsafe { cast::<_, UstarHeader>(self) }; + ustar.magic[..] == b"ustar "[..] && ustar.version[..] == b" \0"[..] + } + + /// View this archive header as a raw "old" archive header. + /// + /// This view will always succeed as all archive header formats will fill + /// out at least the fields specified in the old header format. + pub fn as_old(&self) -> &OldHeader { + unsafe { cast(self) } + } + + /// Same as `as_old`, but the mutable version. + pub fn as_old_mut(&mut self) -> &mut OldHeader { + unsafe { cast_mut(self) } + } + + /// View this archive header as a raw UStar archive header. + /// + /// The UStar format is an extension to the tar archive format which enables + /// longer pathnames and a few extra attributes such as the group and user + /// name. + /// + /// This cast may not succeed as this function will test whether the + /// magic/version fields of the UStar format have the appropriate values, + /// returning `None` if they aren't correct. + pub fn as_ustar(&self) -> Option<&UstarHeader> { + if self.is_ustar() { + Some(unsafe { cast(self) }) + } else { + None + } + } + + /// Same as `as_ustar_mut`, but the mutable version. + pub fn as_ustar_mut(&mut self) -> Option<&mut UstarHeader> { + if self.is_ustar() { + Some(unsafe { cast_mut(self) }) + } else { + None + } + } + + /// View this archive header as a raw GNU archive header. + /// + /// The GNU format is an extension to the tar archive format which enables + /// longer pathnames and a few extra attributes such as the group and user + /// name. + /// + /// This cast may not succeed as this function will test whether the + /// magic/version fields of the GNU format have the appropriate values, + /// returning `None` if they aren't correct. + pub fn as_gnu(&self) -> Option<&GnuHeader> { + if self.is_gnu() { + Some(unsafe { cast(self) }) + } else { + None + } + } + + /// Same as `as_gnu`, but the mutable version. + pub fn as_gnu_mut(&mut self) -> Option<&mut GnuHeader> { + if self.is_gnu() { + Some(unsafe { cast_mut(self) }) + } else { + None + } + } + + /// Treats the given byte slice as a header. + /// + /// Panics if the length of the passed slice is not equal to 512. + pub fn from_byte_slice(bytes: &[u8]) -> &Header { + assert_eq!(bytes.len(), mem::size_of::
()); + assert_eq!(mem::align_of_val(bytes), mem::align_of::
()); + unsafe { &*(bytes.as_ptr() as *const Header) } + } + + /// Returns a view into this header as a byte array. + pub fn as_bytes(&self) -> &[u8; 512] { + &self.bytes + } + + /// Returns a view into this header as a byte array. + pub fn as_mut_bytes(&mut self) -> &mut [u8; 512] { + &mut self.bytes + } + + /// Blanket sets the metadata in this header from the metadata argument + /// provided. + /// + /// This is useful for initializing a `Header` from the OS's metadata from a + /// file. By default, this will use `HeaderMode::Complete` to include all + /// metadata. + pub fn set_metadata(&mut self, meta: &Metadata) { + self.fill_from(meta, HeaderMode::Complete); + } + + /// Sets only the metadata relevant to the given HeaderMode in this header + /// from the metadata argument provided. + pub fn set_metadata_in_mode(&mut self, meta: &Metadata, mode: HeaderMode) { + self.fill_from(meta, mode); + } + + /// Returns the size of entry's data this header represents. + /// + /// This is different from `Header::size` for sparse files, which have + /// some longer `size()` but shorter `entry_size()`. The `entry_size()` + /// listed here should be the number of bytes in the archive this header + /// describes. + /// + /// May return an error if the field is corrupted. + pub fn entry_size(&self) -> io::Result { + num_field_wrapper_from(&self.as_old().size).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting size for {}", err, self.path_lossy()), + ) + }) + } + + /// Returns the file size this header represents. + /// + /// May return an error if the field is corrupted. + pub fn size(&self) -> io::Result { + if self.entry_type().is_gnu_sparse() { + self.as_gnu() + .ok_or_else(|| other("sparse header was not a gnu header")) + .and_then(|h| h.real_size()) + } else { + self.entry_size() + } + } + + /// Encodes the `size` argument into the size field of this header. + pub fn set_size(&mut self, size: u64) { + num_field_wrapper_into(&mut self.as_old_mut().size, size); + } + + /// Returns the raw path name stored in this header. + /// + /// This method may fail if the pathname is not valid Unicode and this is + /// called on a Windows platform. + /// + /// Note that this function will convert any `\` characters to directory + /// separators. + pub fn path(&self) -> io::Result> { + bytes2path(self.path_bytes()) + } + + /// Returns the pathname stored in this header as a byte array. + /// + /// This function is guaranteed to succeed, but you may wish to call the + /// `path` method to convert to a `Path`. + /// + /// Note that this function will convert any `\` characters to directory + /// separators. + pub fn path_bytes(&self) -> Cow<[u8]> { + if let Some(ustar) = self.as_ustar() { + ustar.path_bytes() + } else { + let name = truncate(&self.as_old().name); + Cow::Borrowed(name) + } + } + + /// Gets the path in a "lossy" way, used for error reporting ONLY. + fn path_lossy(&self) -> String { + String::from_utf8_lossy(&self.path_bytes()).to_string() + } + + /// Sets the path name for this header. + /// + /// This function will set the pathname listed in this header, encoding it + /// in the appropriate format. May fail if the path is too long or if the + /// path specified is not Unicode and this is a Windows platform. + pub fn set_path>(&mut self, p: P) -> io::Result<()> { + self._set_path(p.as_ref()) + } + + fn _set_path(&mut self, path: &Path) -> io::Result<()> { + if let Some(ustar) = self.as_ustar_mut() { + return ustar.set_path(path); + } + copy_path_into(&mut self.as_old_mut().name, path, false).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting path for {}", err, self.path_lossy()), + ) + }) + } + + /// Returns the link name stored in this header, if any is found. + /// + /// This method may fail if the pathname is not valid Unicode and this is + /// called on a Windows platform. `Ok(None)` being returned, however, + /// indicates that the link name was not present. + /// + /// Note that this function will convert any `\` characters to directory + /// separators. + pub fn link_name(&self) -> io::Result>> { + match self.link_name_bytes() { + Some(bytes) => bytes2path(bytes).map(Some), + None => Ok(None), + } + } + + /// Returns the link name stored in this header as a byte array, if any. + /// + /// This function is guaranteed to succeed, but you may wish to call the + /// `link_name` method to convert to a `Path`. + /// + /// Note that this function will convert any `\` characters to directory + /// separators. + pub fn link_name_bytes(&self) -> Option> { + let old = self.as_old(); + if old.linkname[0] != 0 { + Some(Cow::Borrowed(truncate(&old.linkname))) + } else { + None + } + } + + /// Sets the link name for this header. + /// + /// This function will set the linkname listed in this header, encoding it + /// in the appropriate format. May fail if the link name is too long or if + /// the path specified is not Unicode and this is a Windows platform. + pub fn set_link_name>(&mut self, p: P) -> io::Result<()> { + self._set_link_name(p.as_ref()) + } + + fn _set_link_name(&mut self, path: &Path) -> io::Result<()> { + copy_path_into(&mut self.as_old_mut().linkname, path, true).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting link name for {}", err, self.path_lossy()), + ) + }) + } + + /// Returns the mode bits for this file + /// + /// May return an error if the field is corrupted. + pub fn mode(&self) -> io::Result { + octal_from(&self.as_old().mode) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting mode for {}", err, self.path_lossy()), + ) + }) + } + + /// Encodes the `mode` provided into this header. + pub fn set_mode(&mut self, mode: u32) { + octal_into(&mut self.as_old_mut().mode, mode); + } + + /// Returns the value of the owner's user ID field + /// + /// May return an error if the field is corrupted. + pub fn uid(&self) -> io::Result { + num_field_wrapper_from(&self.as_old().uid) + .map(|u| u as u64) + .map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting uid for {}", err, self.path_lossy()), + ) + }) + } + + /// Encodes the `uid` provided into this header. + pub fn set_uid(&mut self, uid: u64) { + num_field_wrapper_into(&mut self.as_old_mut().uid, uid); + } + + /// Returns the value of the group's user ID field + pub fn gid(&self) -> io::Result { + num_field_wrapper_from(&self.as_old().gid) + .map(|u| u as u64) + .map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting gid for {}", err, self.path_lossy()), + ) + }) + } + + /// Encodes the `gid` provided into this header. + pub fn set_gid(&mut self, gid: u64) { + num_field_wrapper_into(&mut self.as_old_mut().gid, gid); + } + + /// Returns the last modification time in Unix time format + pub fn mtime(&self) -> io::Result { + num_field_wrapper_from(&self.as_old().mtime).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting mtime for {}", err, self.path_lossy()), + ) + }) + } + + /// Encodes the `mtime` provided into this header. + /// + /// Note that this time is typically a number of seconds passed since + /// January 1, 1970. + pub fn set_mtime(&mut self, mtime: u64) { + num_field_wrapper_into(&mut self.as_old_mut().mtime, mtime); + } + + /// Return the user name of the owner of this file. + /// + /// A return value of `Ok(Some(..))` indicates that the user name was + /// present and was valid utf-8, `Ok(None)` indicates that the user name is + /// not present in this archive format, and `Err` indicates that the user + /// name was present but was not valid utf-8. + pub fn username(&self) -> Result, str::Utf8Error> { + match self.username_bytes() { + Some(bytes) => str::from_utf8(bytes).map(Some), + None => Ok(None), + } + } + + /// Returns the user name of the owner of this file, if present. + /// + /// A return value of `None` indicates that the user name is not present in + /// this header format. + pub fn username_bytes(&self) -> Option<&[u8]> { + if let Some(ustar) = self.as_ustar() { + Some(ustar.username_bytes()) + } else if let Some(gnu) = self.as_gnu() { + Some(gnu.username_bytes()) + } else { + None + } + } + + /// Sets the username inside this header. + /// + /// This function will return an error if this header format cannot encode a + /// user name or the name is too long. + pub fn set_username(&mut self, name: &str) -> io::Result<()> { + if let Some(ustar) = self.as_ustar_mut() { + return ustar.set_username(name); + } + if let Some(gnu) = self.as_gnu_mut() { + gnu.set_username(name) + } else { + Err(other("not a ustar or gnu archive, cannot set username")) + } + } + + /// Return the group name of the owner of this file. + /// + /// A return value of `Ok(Some(..))` indicates that the group name was + /// present and was valid utf-8, `Ok(None)` indicates that the group name is + /// not present in this archive format, and `Err` indicates that the group + /// name was present but was not valid utf-8. + pub fn groupname(&self) -> Result, str::Utf8Error> { + match self.groupname_bytes() { + Some(bytes) => str::from_utf8(bytes).map(Some), + None => Ok(None), + } + } + + /// Returns the group name of the owner of this file, if present. + /// + /// A return value of `None` indicates that the group name is not present in + /// this header format. + pub fn groupname_bytes(&self) -> Option<&[u8]> { + if let Some(ustar) = self.as_ustar() { + Some(ustar.groupname_bytes()) + } else if let Some(gnu) = self.as_gnu() { + Some(gnu.groupname_bytes()) + } else { + None + } + } + + /// Sets the group name inside this header. + /// + /// This function will return an error if this header format cannot encode a + /// group name or the name is too long. + pub fn set_groupname(&mut self, name: &str) -> io::Result<()> { + if let Some(ustar) = self.as_ustar_mut() { + return ustar.set_groupname(name); + } + if let Some(gnu) = self.as_gnu_mut() { + gnu.set_groupname(name) + } else { + Err(other("not a ustar or gnu archive, cannot set groupname")) + } + } + + /// Returns the device major number, if present. + /// + /// This field may not be present in all archives, and it may not be + /// correctly formed in all archives. `Ok(Some(..))` means it was present + /// and correctly decoded, `Ok(None)` indicates that this header format does + /// not include the device major number, and `Err` indicates that it was + /// present and failed to decode. + pub fn device_major(&self) -> io::Result> { + if let Some(ustar) = self.as_ustar() { + ustar.device_major().map(Some) + } else if let Some(gnu) = self.as_gnu() { + gnu.device_major().map(Some) + } else { + Ok(None) + } + } + + /// Encodes the value `major` into the dev_major field of this header. + /// + /// This function will return an error if this header format cannot encode a + /// major device number. + pub fn set_device_major(&mut self, major: u32) -> io::Result<()> { + if let Some(ustar) = self.as_ustar_mut() { + ustar.set_device_major(major); + Ok(()) + } else if let Some(gnu) = self.as_gnu_mut() { + gnu.set_device_major(major); + Ok(()) + } else { + Err(other("not a ustar or gnu archive, cannot set dev_major")) + } + } + + /// Returns the device minor number, if present. + /// + /// This field may not be present in all archives, and it may not be + /// correctly formed in all archives. `Ok(Some(..))` means it was present + /// and correctly decoded, `Ok(None)` indicates that this header format does + /// not include the device minor number, and `Err` indicates that it was + /// present and failed to decode. + pub fn device_minor(&self) -> io::Result> { + if let Some(ustar) = self.as_ustar() { + ustar.device_minor().map(Some) + } else if let Some(gnu) = self.as_gnu() { + gnu.device_minor().map(Some) + } else { + Ok(None) + } + } + + /// Encodes the value `minor` into the dev_minor field of this header. + /// + /// This function will return an error if this header format cannot encode a + /// minor device number. + pub fn set_device_minor(&mut self, minor: u32) -> io::Result<()> { + if let Some(ustar) = self.as_ustar_mut() { + ustar.set_device_minor(minor); + Ok(()) + } else if let Some(gnu) = self.as_gnu_mut() { + gnu.set_device_minor(minor); + Ok(()) + } else { + Err(other("not a ustar or gnu archive, cannot set dev_minor")) + } + } + + /// Returns the type of file described by this header. + pub fn entry_type(&self) -> EntryType { + EntryType::new(self.as_old().linkflag[0]) + } + + /// Sets the type of file that will be described by this header. + pub fn set_entry_type(&mut self, ty: EntryType) { + self.as_old_mut().linkflag = [ty.as_byte()]; + } + + /// Returns the checksum field of this header. + /// + /// May return an error if the field is corrupted. + pub fn cksum(&self) -> io::Result { + octal_from(&self.as_old().cksum) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting cksum for {}", err, self.path_lossy()), + ) + }) + } + + /// Sets the checksum field of this header based on the current fields in + /// this header. + pub fn set_cksum(&mut self) { + let cksum = self.calculate_cksum(); + octal_into(&mut self.as_old_mut().cksum, cksum); + } + + fn calculate_cksum(&self) -> u32 { + let old = self.as_old(); + let start = old as *const _ as usize; + let cksum_start = old.cksum.as_ptr() as *const _ as usize; + let offset = cksum_start - start; + let len = old.cksum.len(); + self.bytes[0..offset] + .iter() + .chain(iter::repeat(&b' ').take(len)) + .chain(&self.bytes[offset + len..]) + .fold(0, |a, b| a + (*b as u32)) + } + + fn fill_from(&mut self, meta: &Metadata, mode: HeaderMode) { + self.fill_platform_from(meta, mode); + // Set size of directories to zero + self.set_size(if meta.is_dir() || meta.file_type().is_symlink() { + 0 + } else { + meta.len() + }); + if let Some(ustar) = self.as_ustar_mut() { + ustar.set_device_major(0); + ustar.set_device_minor(0); + } + if let Some(gnu) = self.as_gnu_mut() { + gnu.set_device_major(0); + gnu.set_device_minor(0); + } + } + + #[cfg(target_arch = "wasm32")] + #[allow(unused_variables)] + fn fill_platform_from(&mut self, meta: &Metadata, mode: HeaderMode) { + unimplemented!(); + } + + #[cfg(any(unix, target_os = "redox"))] + fn fill_platform_from(&mut self, meta: &Metadata, mode: HeaderMode) { + match mode { + HeaderMode::Complete => { + self.set_mtime(meta.mtime() as u64); + self.set_uid(meta.uid() as u64); + self.set_gid(meta.gid() as u64); + self.set_mode(meta.mode() as u32); + } + HeaderMode::Deterministic => { + self.set_mtime(0); + self.set_uid(0); + self.set_gid(0); + + // Use a default umask value, but propagate the (user) execute bit. + let fs_mode = if meta.is_dir() || (0o100 & meta.mode() == 0o100) { + 0o755 + } else { + 0o644 + }; + self.set_mode(fs_mode); + } + } + + // Note that if we are a GNU header we *could* set atime/ctime, except + // the `tar` utility doesn't do that by default and it causes problems + // with 7-zip [1]. + // + // It's always possible to fill them out manually, so we just don't fill + // it out automatically here. + // + // [1]: https://github.com/alexcrichton/tar-rs/issues/70 + + // TODO: need to bind more file types + self.set_entry_type(entry_type(meta.mode())); + + #[cfg(not(target_os = "redox"))] + fn entry_type(mode: u32) -> EntryType { + match mode as libc::mode_t & libc::S_IFMT { + libc::S_IFREG => EntryType::file(), + libc::S_IFLNK => EntryType::symlink(), + libc::S_IFCHR => EntryType::character_special(), + libc::S_IFBLK => EntryType::block_special(), + libc::S_IFDIR => EntryType::dir(), + libc::S_IFIFO => EntryType::fifo(), + _ => EntryType::new(b' '), + } + } + + #[cfg(target_os = "redox")] + fn entry_type(mode: u32) -> EntryType { + use syscall; + match mode as u16 & syscall::MODE_TYPE { + syscall::MODE_FILE => EntryType::file(), + syscall::MODE_SYMLINK => EntryType::symlink(), + syscall::MODE_DIR => EntryType::dir(), + _ => EntryType::new(b' '), + } + } + } + + #[cfg(windows)] + fn fill_platform_from(&mut self, meta: &Metadata, mode: HeaderMode) { + // There's no concept of a file mode on Windows, so do a best approximation here. + match mode { + HeaderMode::Complete => { + self.set_uid(0); + self.set_gid(0); + // The dates listed in tarballs are always seconds relative to + // January 1, 1970. On Windows, however, the timestamps are returned as + // dates relative to January 1, 1601 (in 100ns intervals), so we need to + // add in some offset for those dates. + let mtime = (meta.last_write_time() / (1_000_000_000 / 100)) - 11644473600; + self.set_mtime(mtime); + let fs_mode = { + const FILE_ATTRIBUTE_READONLY: u32 = 0x00000001; + let readonly = meta.file_attributes() & FILE_ATTRIBUTE_READONLY; + match (meta.is_dir(), readonly != 0) { + (true, false) => 0o755, + (true, true) => 0o555, + (false, false) => 0o644, + (false, true) => 0o444, + } + }; + self.set_mode(fs_mode); + } + HeaderMode::Deterministic => { + self.set_uid(0); + self.set_gid(0); + self.set_mtime(0); + let fs_mode = if meta.is_dir() { 0o755 } else { 0o644 }; + self.set_mode(fs_mode); + } + } + + let ft = meta.file_type(); + self.set_entry_type(if ft.is_dir() { + EntryType::dir() + } else if ft.is_file() { + EntryType::file() + } else if ft.is_symlink() { + EntryType::symlink() + } else { + EntryType::new(b' ') + }); + } + + fn debug_fields(&self, b: &mut fmt::DebugStruct) { + if let Ok(entry_size) = self.entry_size() { + b.field("entry_size", &entry_size); + } + if let Ok(size) = self.size() { + b.field("size", &size); + } + if let Ok(path) = self.path() { + b.field("path", &path); + } + if let Ok(link_name) = self.link_name() { + b.field("link_name", &link_name); + } + if let Ok(mode) = self.mode() { + b.field("mode", &DebugAsOctal(mode)); + } + if let Ok(uid) = self.uid() { + b.field("uid", &uid); + } + if let Ok(gid) = self.gid() { + b.field("gid", &gid); + } + if let Ok(mtime) = self.mtime() { + b.field("mtime", &mtime); + } + if let Ok(username) = self.username() { + b.field("username", &username); + } + if let Ok(groupname) = self.groupname() { + b.field("groupname", &groupname); + } + if let Ok(device_major) = self.device_major() { + b.field("device_major", &device_major); + } + if let Ok(device_minor) = self.device_minor() { + b.field("device_minor", &device_minor); + } + if let Ok(cksum) = self.cksum() { + b.field("cksum", &cksum); + b.field("cksum_valid", &(cksum == self.calculate_cksum())); + } + } +} + +struct DebugAsOctal(T); + +impl fmt::Debug for DebugAsOctal { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Octal::fmt(&self.0, f) + } +} + +unsafe fn cast(a: &T) -> &U { + assert_eq!(mem::size_of_val(a), mem::size_of::()); + assert_eq!(mem::align_of_val(a), mem::align_of::()); + &*(a as *const T as *const U) +} + +unsafe fn cast_mut(a: &mut T) -> &mut U { + assert_eq!(mem::size_of_val(a), mem::size_of::()); + assert_eq!(mem::align_of_val(a), mem::align_of::()); + &mut *(a as *mut T as *mut U) +} + +impl Clone for Header { + fn clone(&self) -> Header { + Header { bytes: self.bytes } + } +} + +impl fmt::Debug for Header { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if let Some(me) = self.as_ustar() { + me.fmt(f) + } else if let Some(me) = self.as_gnu() { + me.fmt(f) + } else { + self.as_old().fmt(f) + } + } +} + +impl OldHeader { + /// Views this as a normal `Header` + pub fn as_header(&self) -> &Header { + unsafe { cast(self) } + } + + /// Views this as a normal `Header` + pub fn as_header_mut(&mut self) -> &mut Header { + unsafe { cast_mut(self) } + } +} + +impl fmt::Debug for OldHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut f = f.debug_struct("OldHeader"); + self.as_header().debug_fields(&mut f); + f.finish() + } +} + +impl UstarHeader { + /// See `Header::path_bytes` + pub fn path_bytes(&self) -> Cow<[u8]> { + if self.prefix[0] == 0 && !self.name.contains(&b'\\') { + Cow::Borrowed(truncate(&self.name)) + } else { + let mut bytes = Vec::new(); + let prefix = truncate(&self.prefix); + if !prefix.is_empty() { + bytes.extend_from_slice(prefix); + bytes.push(b'/'); + } + bytes.extend_from_slice(truncate(&self.name)); + Cow::Owned(bytes) + } + } + + /// Gets the path in a "lossy" way, used for error reporting ONLY. + fn path_lossy(&self) -> String { + String::from_utf8_lossy(&self.path_bytes()).to_string() + } + + /// See `Header::set_path` + pub fn set_path>(&mut self, p: P) -> io::Result<()> { + self._set_path(p.as_ref()) + } + + fn _set_path(&mut self, path: &Path) -> io::Result<()> { + // This can probably be optimized quite a bit more, but for now just do + // something that's relatively easy and readable. + // + // First up, if the path fits within `self.name` then we just shove it + // in there. If not then we try to split it between some existing path + // components where it can fit in name/prefix. To do that we peel off + // enough until the path fits in `prefix`, then we try to put both + // halves into their destination. + let bytes = path2bytes(path)?; + let (maxnamelen, maxprefixlen) = (self.name.len(), self.prefix.len()); + if bytes.len() <= maxnamelen { + copy_path_into(&mut self.name, path, false).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting path for {}", err, self.path_lossy()), + ) + })?; + } else { + let mut prefix = path; + let mut prefixlen; + loop { + match prefix.parent() { + Some(parent) => prefix = parent, + None => { + return Err(other(&format!( + "path cannot be split to be inserted into archive: {}", + path.display() + ))); + } + } + prefixlen = path2bytes(prefix)?.len(); + if prefixlen <= maxprefixlen { + break; + } + } + copy_path_into(&mut self.prefix, prefix, false).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting path for {}", err, self.path_lossy()), + ) + })?; + let path = bytes2path(Cow::Borrowed(&bytes[prefixlen + 1..]))?; + copy_path_into(&mut self.name, &path, false).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting path for {}", err, self.path_lossy()), + ) + })?; + } + Ok(()) + } + + /// See `Header::username_bytes` + pub fn username_bytes(&self) -> &[u8] { + truncate(&self.uname) + } + + /// See `Header::set_username` + pub fn set_username(&mut self, name: &str) -> io::Result<()> { + copy_into(&mut self.uname, name.as_bytes()).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting username for {}", err, self.path_lossy()), + ) + }) + } + + /// See `Header::groupname_bytes` + pub fn groupname_bytes(&self) -> &[u8] { + truncate(&self.gname) + } + + /// See `Header::set_groupname` + pub fn set_groupname(&mut self, name: &str) -> io::Result<()> { + copy_into(&mut self.gname, name.as_bytes()).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting groupname for {}", err, self.path_lossy()), + ) + }) + } + + /// See `Header::device_major` + pub fn device_major(&self) -> io::Result { + octal_from(&self.dev_major) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when getting device_major for {}", + err, + self.path_lossy() + ), + ) + }) + } + + /// See `Header::set_device_major` + pub fn set_device_major(&mut self, major: u32) { + octal_into(&mut self.dev_major, major); + } + + /// See `Header::device_minor` + pub fn device_minor(&self) -> io::Result { + octal_from(&self.dev_minor) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when getting device_minor for {}", + err, + self.path_lossy() + ), + ) + }) + } + + /// See `Header::set_device_minor` + pub fn set_device_minor(&mut self, minor: u32) { + octal_into(&mut self.dev_minor, minor); + } + + /// Views this as a normal `Header` + pub fn as_header(&self) -> &Header { + unsafe { cast(self) } + } + + /// Views this as a normal `Header` + pub fn as_header_mut(&mut self) -> &mut Header { + unsafe { cast_mut(self) } + } +} + +impl fmt::Debug for UstarHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut f = f.debug_struct("UstarHeader"); + self.as_header().debug_fields(&mut f); + f.finish() + } +} + +impl GnuHeader { + /// See `Header::username_bytes` + pub fn username_bytes(&self) -> &[u8] { + truncate(&self.uname) + } + + /// Gets the fullname (group:user) in a "lossy" way, used for error reporting ONLY. + fn fullname_lossy(&self) -> String { + format!( + "{}:{}", + String::from_utf8_lossy(&self.groupname_bytes()), + String::from_utf8_lossy(&self.username_bytes()), + ) + } + + /// See `Header::set_username` + pub fn set_username(&mut self, name: &str) -> io::Result<()> { + copy_into(&mut self.uname, name.as_bytes()).map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when setting username for {}", + err, + self.fullname_lossy() + ), + ) + }) + } + + /// See `Header::groupname_bytes` + pub fn groupname_bytes(&self) -> &[u8] { + truncate(&self.gname) + } + + /// See `Header::set_groupname` + pub fn set_groupname(&mut self, name: &str) -> io::Result<()> { + copy_into(&mut self.gname, name.as_bytes()).map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when setting groupname for {}", + err, + self.fullname_lossy() + ), + ) + }) + } + + /// See `Header::device_major` + pub fn device_major(&self) -> io::Result { + octal_from(&self.dev_major) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when getting device_major for {}", + err, + self.fullname_lossy() + ), + ) + }) + } + + /// See `Header::set_device_major` + pub fn set_device_major(&mut self, major: u32) { + octal_into(&mut self.dev_major, major); + } + + /// See `Header::device_minor` + pub fn device_minor(&self) -> io::Result { + octal_from(&self.dev_minor) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when getting device_minor for {}", + err, + self.fullname_lossy() + ), + ) + }) + } + + /// See `Header::set_device_minor` + pub fn set_device_minor(&mut self, minor: u32) { + octal_into(&mut self.dev_minor, minor); + } + + /// Returns the last modification time in Unix time format + pub fn atime(&self) -> io::Result { + num_field_wrapper_from(&self.atime).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting atime for {}", err, self.fullname_lossy()), + ) + }) + } + + /// Encodes the `atime` provided into this header. + /// + /// Note that this time is typically a number of seconds passed since + /// January 1, 1970. + pub fn set_atime(&mut self, atime: u64) { + num_field_wrapper_into(&mut self.atime, atime); + } + + /// Returns the last modification time in Unix time format + pub fn ctime(&self) -> io::Result { + num_field_wrapper_from(&self.ctime).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting ctime for {}", err, self.fullname_lossy()), + ) + }) + } + + /// Encodes the `ctime` provided into this header. + /// + /// Note that this time is typically a number of seconds passed since + /// January 1, 1970. + pub fn set_ctime(&mut self, ctime: u64) { + num_field_wrapper_into(&mut self.ctime, ctime); + } + + /// Returns the "real size" of the file this header represents. + /// + /// This is applicable for sparse files where the returned size here is the + /// size of the entire file after the sparse regions have been filled in. + pub fn real_size(&self) -> io::Result { + octal_from(&self.realsize).map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when getting real_size for {}", + err, + self.fullname_lossy() + ), + ) + }) + } + + /// Indicates whether this header will be followed by additional + /// sparse-header records. + /// + /// Note that this is handled internally by this library, and is likely only + /// interesting if a `raw` iterator is being used. + pub fn is_extended(&self) -> bool { + self.isextended[0] == 1 + } + + /// Views this as a normal `Header` + pub fn as_header(&self) -> &Header { + unsafe { cast(self) } + } + + /// Views this as a normal `Header` + pub fn as_header_mut(&mut self) -> &mut Header { + unsafe { cast_mut(self) } + } +} + +impl fmt::Debug for GnuHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut f = f.debug_struct("GnuHeader"); + self.as_header().debug_fields(&mut f); + if let Ok(atime) = self.atime() { + f.field("atime", &atime); + } + if let Ok(ctime) = self.ctime() { + f.field("ctime", &ctime); + } + f.field("is_extended", &self.is_extended()) + .field("sparse", &DebugSparseHeaders(&self.sparse)) + .finish() + } +} + +struct DebugSparseHeaders<'a>(&'a [GnuSparseHeader]); + +impl<'a> fmt::Debug for DebugSparseHeaders<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut f = f.debug_list(); + for header in self.0 { + if !header.is_empty() { + f.entry(header); + } + } + f.finish() + } +} + +impl GnuSparseHeader { + /// Returns true if block is empty + pub fn is_empty(&self) -> bool { + self.offset[0] == 0 || self.numbytes[0] == 0 + } + + /// Offset of the block from the start of the file + /// + /// Returns `Err` for a malformed `offset` field. + pub fn offset(&self) -> io::Result { + octal_from(&self.offset).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting offset from sparse header", err), + ) + }) + } + + /// Length of the block + /// + /// Returns `Err` for a malformed `numbytes` field. + pub fn length(&self) -> io::Result { + octal_from(&self.numbytes).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting length from sparse header", err), + ) + }) + } +} + +impl fmt::Debug for GnuSparseHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut f = f.debug_struct("GnuSparseHeader"); + if let Ok(offset) = self.offset() { + f.field("offset", &offset); + } + if let Ok(length) = self.length() { + f.field("length", &length); + } + f.finish() + } +} + +impl GnuExtSparseHeader { + /// Crates a new zero'd out sparse header entry. + pub fn new() -> GnuExtSparseHeader { + unsafe { mem::zeroed() } + } + + /// Returns a view into this header as a byte array. + pub fn as_bytes(&self) -> &[u8; 512] { + debug_assert_eq!(mem::size_of_val(self), 512); + unsafe { &*(self as *const GnuExtSparseHeader as *const [u8; 512]) } + } + + /// Returns a view into this header as a byte array. + pub fn as_mut_bytes(&mut self) -> &mut [u8; 512] { + debug_assert_eq!(mem::size_of_val(self), 512); + unsafe { &mut *(self as *mut GnuExtSparseHeader as *mut [u8; 512]) } + } + + /// Returns a slice of the underlying sparse headers. + /// + /// Some headers may represent empty chunks of both the offset and numbytes + /// fields are 0. + pub fn sparse(&self) -> &[GnuSparseHeader; 21] { + &self.sparse + } + + /// Indicates if another sparse header should be following this one. + pub fn is_extended(&self) -> bool { + self.isextended[0] == 1 + } +} + +impl Default for GnuExtSparseHeader { + fn default() -> Self { + Self::new() + } +} + +fn octal_from(slice: &[u8]) -> io::Result { + let trun = truncate(slice); + let num = match str::from_utf8(trun) { + Ok(n) => n, + Err(_) => { + return Err(other(&format!( + "numeric field did not have utf-8 text: {}", + String::from_utf8_lossy(trun) + ))); + } + }; + match u64::from_str_radix(num.trim(), 8) { + Ok(n) => Ok(n), + Err(_) => Err(other(&format!("numeric field was not a number: {}", num))), + } +} + +fn octal_into(dst: &mut [u8], val: T) { + let o = format!("{:o}", val); + let value = o.bytes().rev().chain(repeat(b'0')); + for (slot, value) in dst.iter_mut().rev().skip(1).zip(value) { + *slot = value; + } +} + +// Wrapper to figure out if we should fill the header field using tar's numeric +// extension (binary) or not (octal). +fn num_field_wrapper_into(dst: &mut [u8], src: u64) { + if src >= 8_589_934_592 || (src >= 2_097_152 && dst.len() == 8) { + numeric_extended_into(dst, src); + } else { + octal_into(dst, src); + } +} + +// Wrapper to figure out if we should read the header field in binary (numeric +// extension) or octal (standard encoding). +fn num_field_wrapper_from(src: &[u8]) -> io::Result { + if src[0] & 0x80 != 0 { + Ok(numeric_extended_from(src)) + } else { + octal_from(src) + } +} + +// When writing numeric fields with is the extended form, the high bit of the +// first byte is set to 1 and the remainder of the field is treated as binary +// instead of octal ascii. +// This handles writing u64 to 8 (uid, gid) or 12 (size, *time) bytes array. +fn numeric_extended_into(dst: &mut [u8], src: u64) { + let len: usize = dst.len(); + for (slot, val) in dst.iter_mut().zip( + repeat(0) + .take(len - 8) // to zero init extra bytes + .chain((0..8).rev().map(|x| ((src >> (8 * x)) & 0xff) as u8)), + ) { + *slot = val; + } + dst[0] |= 0x80; +} + +fn numeric_extended_from(src: &[u8]) -> u64 { + let mut dst: u64 = 0; + let mut b_to_skip = 1; + if src.len() == 8 { + // read first byte without extension flag bit + dst = (src[0] ^ 0x80) as u64; + } else { + // only read last 8 bytes + b_to_skip = src.len() - 8; + } + for byte in src.iter().skip(b_to_skip) { + dst <<= 8; + dst |= *byte as u64; + } + dst +} + +fn truncate(slice: &[u8]) -> &[u8] { + match slice.iter().position(|i| *i == 0) { + Some(i) => &slice[..i], + None => slice, + } +} + +/// Copies `bytes` into the `slot` provided, returning an error if the `bytes` +/// array is too long or if it contains any nul bytes. +fn copy_into(slot: &mut [u8], bytes: &[u8]) -> io::Result<()> { + if bytes.len() > slot.len() { + Err(other("provided value is too long")) + } else if bytes.iter().any(|b| *b == 0) { + Err(other("provided value contains a nul byte")) + } else { + for (slot, val) in slot.iter_mut().zip(bytes.iter().chain(Some(&0))) { + *slot = *val; + } + Ok(()) + } +} + +/// Copies `path` into the `slot` provided +/// +/// Returns an error if: +/// +/// * the path is too long to fit +/// * a nul byte was found +/// * an invalid path component is encountered (e.g. a root path or parent dir) +/// * the path itself is empty +fn copy_path_into(mut slot: &mut [u8], path: &Path, is_link_name: bool) -> io::Result<()> { + let mut emitted = false; + let mut needs_slash = false; + for component in path.components() { + let bytes = path2bytes(Path::new(component.as_os_str()))?; + match (component, is_link_name) { + (Component::Prefix(..), false) | (Component::RootDir, false) => { + return Err(other("paths in archives must be relative")); + } + (Component::ParentDir, false) => { + return Err(other("paths in archives must not have `..`")); + } + // Allow "./" as the path + (Component::CurDir, false) if path.components().count() == 1 => {} + (Component::CurDir, false) => continue, + (Component::Normal(_), _) | (_, true) => {} + }; + if needs_slash { + copy(&mut slot, b"/")?; + } + if bytes.contains(&b'/') { + if let Component::Normal(..) = component { + return Err(other("path component in archive cannot contain `/`")); + } + } + copy(&mut slot, &*bytes)?; + if &*bytes != b"/" { + needs_slash = true; + } + emitted = true; + } + if !emitted { + return Err(other("paths in archives must have at least one component")); + } + if ends_with_slash(path) { + copy(&mut slot, &[b'/'])?; + } + return Ok(()); + + fn copy(slot: &mut &mut [u8], bytes: &[u8]) -> io::Result<()> { + copy_into(*slot, bytes)?; + let tmp = mem::replace(slot, &mut []); + *slot = &mut tmp[bytes.len()..]; + Ok(()) + } +} + +#[cfg(target_arch = "wasm32")] +fn ends_with_slash(p: &Path) -> bool { + p.to_string_lossy().ends_with('/') +} + +#[cfg(windows)] +fn ends_with_slash(p: &Path) -> bool { + let last = p.as_os_str().encode_wide().last(); + last == Some(b'/' as u16) || last == Some(b'\\' as u16) +} + +#[cfg(any(unix, target_os = "redox"))] +fn ends_with_slash(p: &Path) -> bool { + p.as_os_str().as_bytes().ends_with(&[b'/']) +} + +#[cfg(any(windows, target_arch = "wasm32"))] +pub fn path2bytes(p: &Path) -> io::Result> { + p.as_os_str() + .to_str() + .map(|s| s.as_bytes()) + .ok_or_else(|| other(&format!("path {} was not valid Unicode", p.display()))) + .map(|bytes| { + if bytes.contains(&b'\\') { + // Normalize to Unix-style path separators + let mut bytes = bytes.to_owned(); + for b in &mut bytes { + if *b == b'\\' { + *b = b'/'; + } + } + Cow::Owned(bytes) + } else { + Cow::Borrowed(bytes) + } + }) +} + +#[cfg(any(unix, target_os = "redox"))] +/// On unix this will never fail +pub fn path2bytes(p: &Path) -> io::Result> { + Ok(p.as_os_str().as_bytes()).map(Cow::Borrowed) +} + +#[cfg(windows)] +/// On windows we cannot accept non-Unicode bytes because it +/// is impossible to convert it to UTF-16. +pub fn bytes2path(bytes: Cow<[u8]>) -> io::Result> { + return match bytes { + Cow::Borrowed(bytes) => { + let s = str::from_utf8(bytes).map_err(|_| not_unicode(bytes))?; + Ok(Cow::Borrowed(Path::new(s))) + } + Cow::Owned(bytes) => { + let s = String::from_utf8(bytes).map_err(|uerr| not_unicode(&uerr.into_bytes()))?; + Ok(Cow::Owned(PathBuf::from(s))) + } + }; + + fn not_unicode(v: &[u8]) -> io::Error { + other(&format!( + "only Unicode paths are supported on Windows: {}", + String::from_utf8_lossy(v) + )) + } +} + +#[cfg(any(unix, target_os = "redox"))] +/// On unix this operation can never fail. +pub fn bytes2path(bytes: Cow<'_, [u8]>) -> io::Result> { + use std::ffi::{OsStr, OsString}; + + Ok(match bytes { + Cow::Borrowed(bytes) => Cow::Borrowed(Path::new(OsStr::from_bytes(bytes))), + Cow::Owned(bytes) => Cow::Owned(PathBuf::from(OsString::from_vec(bytes))), + }) +} + +#[cfg(target_arch = "wasm32")] +pub fn bytes2path(bytes: Cow<[u8]>) -> io::Result> { + Ok(match bytes { + Cow::Borrowed(bytes) => { + Cow::Borrowed({ Path::new(str::from_utf8(bytes).map_err(invalid_utf8)?) }) + } + Cow::Owned(bytes) => { + Cow::Owned({ PathBuf::from(String::from_utf8(bytes).map_err(invalid_utf8)?) }) + } + }) +} + +#[cfg(target_arch = "wasm32")] +fn invalid_utf8(_: T) -> io::Error { + io::Error::new(io::ErrorKind::InvalidData, "Invalid utf-8") +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 00000000..b22607d5 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,45 @@ +//! A library for reading and writing TAR archives in an async fashion. +//! +//! This library provides utilities necessary to manage [TAR archives][1] +//! abstracted over a reader or writer. Great strides are taken to ensure that +//! an archive is never required to be fully resident in memory, and all objects +//! provide largely a streaming interface to read bytes from. +//! +//! [1]: http://en.wikipedia.org/wiki/Tar_%28computing%29 + +// More docs about the detailed tar format can also be found here: +// http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5&manpath=FreeBSD+8-current + +// NB: some of the coding patterns and idioms here may seem a little strange. +// This is currently attempting to expose a super generic interface while +// also not forcing clients to codegen the entire crate each time they use +// it. To that end lots of work is done to ensure that concrete +// implementations are all found in this crate and the generic functions are +// all just super thin wrappers (e.g. easy to codegen). + +#![deny(missing_docs)] + +use std::io::{Error, ErrorKind}; + +pub use crate::{ + archive::{Archive, ArchiveBuilder, Entries}, + builder::Builder, + entry::{Entry, Unpacked}, + entry_type::EntryType, + header::{ + GnuExtSparseHeader, GnuHeader, GnuSparseHeader, Header, HeaderMode, OldHeader, UstarHeader, + }, + pax::{PaxExtension, PaxExtensions}, +}; + +mod archive; +mod builder; +mod entry; +mod entry_type; +mod error; +mod header; +mod pax; + +fn other(msg: &str) -> Error { + Error::new(ErrorKind::Other, msg) +} diff --git a/src/pax.rs b/src/pax.rs new file mode 100644 index 00000000..0405899a --- /dev/null +++ b/src/pax.rs @@ -0,0 +1,88 @@ +use std::{slice, str}; + +use tokio::io; + +use crate::other; + +/// An iterator over the pax extensions in an archive entry. +/// +/// This iterator yields structures which can themselves be parsed into +/// key/value pairs. +pub struct PaxExtensions<'entry> { + data: slice::Split<'entry, u8, fn(&u8) -> bool>, +} + +/// A key/value pair corresponding to a pax extension. +pub struct PaxExtension<'entry> { + key: &'entry [u8], + value: &'entry [u8], +} + +pub fn pax_extensions(a: &[u8]) -> PaxExtensions { + PaxExtensions { + data: a.split(|a| *a == b'\n'), + } +} + +impl<'entry> Iterator for PaxExtensions<'entry> { + type Item = io::Result>; + + fn next(&mut self) -> Option>> { + let line = match self.data.next() { + Some(line) if line.is_empty() => return None, + Some(line) => line, + None => return None, + }; + + Some( + line.iter() + .position(|b| *b == b' ') + .and_then(|i| { + str::from_utf8(&line[..i]) + .ok() + .and_then(|len| len.parse::().ok().map(|j| (i + 1, j))) + }) + .and_then(|(kvstart, reported_len)| { + if line.len() + 1 == reported_len { + line[kvstart..] + .iter() + .position(|b| *b == b'=') + .map(|equals| (kvstart, equals)) + } else { + None + } + }) + .map(|(kvstart, equals)| PaxExtension { + key: &line[kvstart..kvstart + equals], + value: &line[kvstart + equals + 1..], + }) + .ok_or_else(|| other("malformed pax extension")), + ) + } +} + +impl<'entry> PaxExtension<'entry> { + /// Returns the key for this key/value pair parsed as a string. + /// + /// May fail if the key isn't actually utf-8. + pub fn key(&self) -> Result<&'entry str, str::Utf8Error> { + str::from_utf8(self.key) + } + + /// Returns the underlying raw bytes for the key of this key/value pair. + pub fn key_bytes(&self) -> &'entry [u8] { + self.key + } + + /// Returns the value for this key/value pair parsed as a string. + /// + /// May fail if the value isn't actually utf-8. + pub fn value(&self) -> Result<&'entry str, str::Utf8Error> { + str::from_utf8(self.value) + } + + /// Returns the underlying raw bytes for this value of this key/value pair. + pub fn value_bytes(&self) -> &'entry [u8] { + self.value + } +} diff --git a/tests/all.rs b/tests/all.rs new file mode 100644 index 00000000..7fefb10a --- /dev/null +++ b/tests/all.rs @@ -0,0 +1,1117 @@ +extern crate tokio_tar as async_tar; + +extern crate filetime; +extern crate tempfile; +#[cfg(all(unix, feature = "xattr"))] +extern crate xattr; + +use std::{ + io::Cursor, + iter::repeat, + path::{Path, PathBuf}, +}; +use tokio::{ + fs::{self, File}, + io::{self, AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}, +}; +use tokio_stream::*; + +use async_tar::{Archive, ArchiveBuilder, Builder, EntryType, Header}; +use filetime::FileTime; +use tempfile::{Builder as TempBuilder, TempDir}; + +macro_rules! t { + ($e:expr) => { + match $e { + Ok(v) => v, + Err(e) => panic!("{} returned {}", stringify!($e), e), + } + }; +} + +macro_rules! tar { + ($e:expr) => { + &include_bytes!(concat!("archives/", $e))[..] + }; +} + +mod header; + +/// test that we can concatenate the simple.tar archive and extract the same entries twice when we +/// use the ignore_zeros option. +#[tokio::test] +async fn simple_concat() { + let bytes = tar!("simple.tar"); + let mut archive_bytes = Vec::new(); + archive_bytes.extend(bytes); + + let original_names: Vec = + decode_names(&mut Archive::new(Cursor::new(&archive_bytes))).await; + let expected: Vec<&str> = original_names.iter().map(|n| n.as_str()).collect(); + + // concat two archives (with null in-between); + archive_bytes.extend(bytes); + + // test now that when we read the archive, it stops processing at the first zero header. + let actual = decode_names(&mut Archive::new(Cursor::new(&archive_bytes))).await; + assert_eq!(expected, actual); + + // extend expected by itself. + let expected: Vec<&str> = { + let mut o = Vec::new(); + o.extend(&expected); + o.extend(&expected); + o + }; + + let builder = ArchiveBuilder::new(Cursor::new(&archive_bytes)).set_ignore_zeros(true); + let mut ar = builder.build(); + + let actual = decode_names(&mut ar).await; + assert_eq!(expected, actual); + + async fn decode_names(ar: &mut Archive) -> Vec + where + R: AsyncRead + Unpin + Sync + Send, + { + let mut names = Vec::new(); + let mut entries = t!(ar.entries()); + + while let Some(entry) = entries.next().await { + let e = t!(entry); + names.push(t!(::std::str::from_utf8(&e.path_bytes())).to_string()); + } + + names + } +} + +#[tokio::test] +async fn header_impls() { + let mut ar = Archive::new(Cursor::new(tar!("simple.tar"))); + let hn = Header::new_old(); + let hnb = hn.as_bytes(); + let mut entries = t!(ar.entries()); + while let Some(file) = entries.next().await { + let file = t!(file); + let h1 = file.header(); + let h1b = h1.as_bytes(); + let h2 = h1.clone(); + let h2b = h2.as_bytes(); + assert!(h1b[..] == h2b[..] && h2b[..] != hnb[..]) + } +} + +#[tokio::test] +async fn header_impls_missing_last_header() { + let mut ar = Archive::new(Cursor::new(tar!("simple_missing_last_header.tar"))); + let hn = Header::new_old(); + let hnb = hn.as_bytes(); + let mut entries = t!(ar.entries()); + + while let Some(file) = entries.next().await { + let file = t!(file); + let h1 = file.header(); + let h1b = h1.as_bytes(); + let h2 = h1.clone(); + let h2b = h2.as_bytes(); + assert!(h1b[..] == h2b[..] && h2b[..] != hnb[..]) + } +} + +#[tokio::test] +async fn reading_files() { + let rdr = Cursor::new(tar!("reading_files.tar")); + let mut ar = Archive::new(rdr); + let mut entries = t!(ar.entries()); + + let mut a = t!(entries.next().await.unwrap()); + assert_eq!(&*a.header().path_bytes(), b"a"); + let mut s = String::new(); + t!(a.read_to_string(&mut s).await); + assert_eq!(s, "a\na\na\na\na\na\na\na\na\na\na\n"); + + let mut b = t!(entries.next().await.unwrap()); + assert_eq!(&*b.header().path_bytes(), b"b"); + s.truncate(0); + t!(b.read_to_string(&mut s).await); + assert_eq!(s, "b\nb\nb\nb\nb\nb\nb\nb\nb\nb\nb\n"); + + assert!(entries.next().await.is_none()); +} + +#[tokio::test] +async fn writing_files() { + let mut ar = Builder::new(Vec::new()); + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let path = td.path().join("test"); + t!(t!(File::create(&path).await).write_all(b"test").await); + + t!(ar + .append_file("test2", &mut t!(File::open(&path).await)) + .await); + + let data = t!(ar.into_inner().await); + let mut ar = Archive::new(Cursor::new(data)); + let mut entries = t!(ar.entries()); + let mut f = t!(entries.next().await.unwrap()); + + assert_eq!(&*f.header().path_bytes(), b"test2"); + assert_eq!(f.header().size().unwrap(), 4); + let mut s = String::new(); + t!(f.read_to_string(&mut s).await); + assert_eq!(s, "test"); + + assert!(entries.next().await.is_none()); +} + +#[tokio::test] +async fn large_filename() { + let mut ar = Builder::new(Vec::new()); + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let path = td.path().join("test"); + t!(t!(File::create(&path).await).write_all(b"test").await); + + let filename = repeat("abcd/").take(50).collect::(); + let mut header = Header::new_ustar(); + header.set_path(&filename).unwrap(); + header.set_metadata(&t!(fs::metadata(&path).await)); + header.set_cksum(); + t!(ar.append(&header, &b"test"[..]).await); + let too_long = repeat("abcd").take(200).collect::(); + t!(ar + .append_file(&too_long, &mut t!(File::open(&path).await)) + .await); + t!(ar.append_data(&mut header, &too_long, &b"test"[..]).await); + + let rd = Cursor::new(t!(ar.into_inner().await)); + let mut ar = Archive::new(rd); + let mut entries = t!(ar.entries()); + + // The short entry added with `append` + let mut f = entries.next().await.unwrap().unwrap(); + assert_eq!(&*f.header().path_bytes(), filename.as_bytes()); + assert_eq!(f.header().size().unwrap(), 4); + let mut s = String::new(); + t!(f.read_to_string(&mut s).await); + assert_eq!(s, "test"); + + // The long entry added with `append_file` + let mut f = entries.next().await.unwrap().unwrap(); + assert_eq!(&*f.path_bytes(), too_long.as_bytes()); + assert_eq!(f.header().size().unwrap(), 4); + let mut s = String::new(); + t!(f.read_to_string(&mut s).await); + assert_eq!(s, "test"); + + // The long entry added with `append_data` + let mut f = entries.next().await.unwrap().unwrap(); + assert!(f.header().path_bytes().len() < too_long.len()); + assert_eq!(&*f.path_bytes(), too_long.as_bytes()); + assert_eq!(f.header().size().unwrap(), 4); + let mut s = String::new(); + t!(f.read_to_string(&mut s).await); + assert_eq!(s, "test"); + + assert!(entries.next().await.is_none()); +} + +#[tokio::test] +async fn reading_entries() { + let rdr = Cursor::new(tar!("reading_files.tar")); + let mut ar = Archive::new(rdr); + let mut entries = t!(ar.entries()); + let mut a = t!(entries.next().await.unwrap()); + assert_eq!(&*a.header().path_bytes(), b"a"); + let mut s = String::new(); + t!(a.read_to_string(&mut s).await); + assert_eq!(s, "a\na\na\na\na\na\na\na\na\na\na\n"); + s.truncate(0); + t!(a.read_to_string(&mut s).await); + assert_eq!(s, ""); + let mut b = t!(entries.next().await.unwrap()); + + assert_eq!(&*b.header().path_bytes(), b"b"); + s.truncate(0); + t!(b.read_to_string(&mut s).await); + assert_eq!(s, "b\nb\nb\nb\nb\nb\nb\nb\nb\nb\nb\n"); + assert!(entries.next().await.is_none()); +} + +async fn check_dirtree(td: &TempDir) { + let dir_a = td.path().join("a"); + let dir_b = td.path().join("a/b"); + let file_c = td.path().join("a/c"); + assert!(fs::metadata(&dir_a) + .await + .map(|m| m.is_dir()) + .unwrap_or(false)); + assert!(fs::metadata(&dir_b) + .await + .map(|m| m.is_dir()) + .unwrap_or(false)); + assert!(fs::metadata(&file_c) + .await + .map(|m| m.is_file()) + .unwrap_or(false)); +} + +#[tokio::test] +async fn extracting_directories() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + let rdr = Cursor::new(tar!("directory.tar")); + let mut ar = Archive::new(rdr); + t!(ar.unpack(td.path()).await); + check_dirtree(&td).await; +} + +#[tokio::test] +#[cfg(all(unix, feature = "xattr"))] +async fn xattrs() { + // If /tmp is a tmpfs, xattr will fail + // The xattr crate's unit tests also use /var/tmp for this reason + let td = t!(TempBuilder::new() + .prefix("async-tar") + .tempdir_in("/var/tmp")); + let rdr = Cursor::new(tar!("xattrs.tar")); + let builder = ArchiveBuilder::new(rdr).set_unpack_xattrs(true); + let mut ar = builder.build(); + t!(ar.unpack(td.path()).await); + + let val = xattr::get(td.path().join("a/b"), "user.pax.flags").unwrap(); + assert_eq!(val.unwrap(), b"epm"); +} + +#[tokio::test] +#[cfg(all(unix, feature = "xattr"))] +async fn no_xattrs() { + // If /tmp is a tmpfs, xattr will fail + // The xattr crate's unit tests also use /var/tmp for this reason + let td = t!(TempBuilder::new() + .prefix("async-tar") + .tempdir_in("/var/tmp")); + let rdr = Cursor::new(tar!("xattrs.tar")); + let builder = ArchiveBuilder::new(rdr).set_unpack_xattrs(false); + let mut ar = builder.build(); + t!(ar.unpack(td.path()).await); + + assert_eq!( + xattr::get(td.path().join("a/b"), "user.pax.flags").unwrap(), + None + ); +} + +#[tokio::test] +async fn writing_and_extracting_directories() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let mut ar = Builder::new(Vec::new()); + let tmppath = td.path().join("tmpfile"); + t!(t!(File::create(&tmppath).await).write_all(b"c").await); + t!(ar.append_dir("a", ".").await); + t!(ar.append_dir("a/b", ".").await); + t!(ar + .append_file("a/c", &mut t!(File::open(&tmppath).await)) + .await); + t!(ar.finish().await); + + let rdr = Cursor::new(t!(ar.into_inner().await)); + let mut ar = Archive::new(rdr); + t!(ar.unpack(td.path()).await); + check_dirtree(&td).await; +} + +#[tokio::test] +async fn writing_directories_recursively() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let base_dir = td.path().join("base"); + t!(fs::create_dir(&base_dir).await); + t!(t!(File::create(base_dir.join("file1")).await) + .write_all(b"file1") + .await); + let sub_dir = base_dir.join("sub"); + t!(fs::create_dir(&sub_dir).await); + t!(t!(File::create(sub_dir.join("file2")).await) + .write_all(b"file2") + .await); + + let mut ar = Builder::new(Vec::new()); + t!(ar.append_dir_all("foobar", base_dir).await); + let data = t!(ar.into_inner().await); + + let mut ar = Archive::new(Cursor::new(data)); + t!(ar.unpack(td.path()).await); + let base_dir = td.path().join("foobar"); + assert!(fs::metadata(&base_dir) + .await + .map(|m| m.is_dir()) + .unwrap_or(false)); + let file1_path = base_dir.join("file1"); + assert!(fs::metadata(&file1_path) + .await + .map(|m| m.is_file()) + .unwrap_or(false)); + let sub_dir = base_dir.join("sub"); + assert!(fs::metadata(&sub_dir) + .await + .map(|m| m.is_dir()) + .unwrap_or(false)); + let file2_path = sub_dir.join("file2"); + assert!(fs::metadata(&file2_path) + .await + .map(|m| m.is_file()) + .unwrap_or(false)); +} + +#[tokio::test] +async fn append_dir_all_blank_dest() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let base_dir = td.path().join("base"); + t!(fs::create_dir(&base_dir).await); + t!(t!(File::create(base_dir.join("file1")).await) + .write_all(b"file1") + .await); + let sub_dir = base_dir.join("sub"); + t!(fs::create_dir(&sub_dir).await); + t!(t!(File::create(sub_dir.join("file2")).await) + .write_all(b"file2") + .await); + + let mut ar = Builder::new(Vec::new()); + t!(ar.append_dir_all("", base_dir).await); + let data = t!(ar.into_inner().await); + + let mut ar = Archive::new(Cursor::new(data)); + t!(ar.unpack(td.path()).await); + let base_dir = td.path(); + assert!(fs::metadata(&base_dir) + .await + .map(|m| m.is_dir()) + .unwrap_or(false)); + let file1_path = base_dir.join("file1"); + assert!(fs::metadata(&file1_path) + .await + .map(|m| m.is_file()) + .unwrap_or(false)); + let sub_dir = base_dir.join("sub"); + assert!(fs::metadata(&sub_dir) + .await + .map(|m| m.is_dir()) + .unwrap_or(false)); + let file2_path = sub_dir.join("file2"); + assert!(fs::metadata(&file2_path) + .await + .map(|m| m.is_file()) + .unwrap_or(false)); +} + +#[tokio::test] +async fn append_dir_all_does_not_work_on_non_directory() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + let path = td.path().join("test"); + t!(t!(File::create(&path).await).write_all(b"test").await); + + let mut ar = Builder::new(Vec::new()); + let result = ar.append_dir_all("test", path).await; + assert!(result.is_err()); +} + +#[tokio::test] +async fn extracting_duplicate_dirs() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + let rdr = Cursor::new(tar!("duplicate_dirs.tar")); + let mut ar = Archive::new(rdr); + t!(ar.unpack(td.path()).await); + + let some_dir = td.path().join("some_dir"); + assert!(fs::metadata(&some_dir) + .await + .map(|m| m.is_dir()) + .unwrap_or(false)); +} + +#[tokio::test] +async fn unpack_old_style_bsd_dir() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let mut ar = Builder::new(Vec::new()); + + let mut header = Header::new_old(); + header.set_entry_type(EntryType::Regular); + t!(header.set_path("testdir/")); + header.set_size(0); + header.set_cksum(); + t!(ar.append(&header, &mut io::empty()).await); + + // Extracting + let rdr = Cursor::new(t!(ar.into_inner().await)); + let mut ar = Archive::new(rdr); + t!(ar.unpack(td.path()).await); + + // Iterating + let rdr = Cursor::new(ar.into_inner().map_err(|_| ()).unwrap().into_inner()); + let mut ar = Archive::new(rdr); + let mut entries = t!(ar.entries()); + + while let Some(e) = entries.next().await { + assert!(e.is_ok()); + } + + assert!(td.path().join("testdir").is_dir()); +} + +#[tokio::test] +async fn handling_incorrect_file_size() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let mut ar = Builder::new(Vec::new()); + + let path = td.path().join("tmpfile"); + t!(File::create(&path).await); + let mut file = t!(File::open(&path).await); + let mut header = Header::new_old(); + t!(header.set_path("somepath")); + header.set_metadata(&t!(file.metadata().await)); + header.set_size(2048); // past the end of file null blocks + header.set_cksum(); + t!(ar.append(&header, &mut file).await); + + // Extracting + let rdr = Cursor::new(t!(ar.into_inner().await)); + let mut ar = Archive::new(rdr); + assert!(ar.unpack(td.path()).await.is_err()); + + // Iterating + let rdr = Cursor::new(ar.into_inner().map_err(|_| ()).unwrap().into_inner()); + let mut ar = Archive::new(rdr); + let mut entries = t!(ar.entries()); + while let Some(fr) = entries.next().await { + if fr.is_err() { + return; + } + } + panic!("Should have errorred"); +} + +#[tokio::test] +async fn extracting_malicious_tarball() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let mut evil_tar = Vec::new(); + + evil_tar = { + let mut a = Builder::new(evil_tar); + async fn append(a: &mut Builder, path: &'static str) { + let mut header = Header::new_gnu(); + assert!(header.set_path(path).is_err(), "was ok: {:?}", path); + { + let h = header.as_gnu_mut().unwrap(); + for (a, b) in h.name.iter_mut().zip(path.as_bytes()) { + *a = *b; + } + } + header.set_size(1); + header.set_cksum(); + t!(a.append(&header, io::repeat(1).take(1)).await); + } + + append(&mut a, "/tmp/abs_evil.txt").await; + append(&mut a, "//tmp/abs_evil2.txt").await; + append(&mut a, "///tmp/abs_evil3.txt").await; + append(&mut a, "/./tmp/abs_evil4.txt").await; + append(&mut a, "//./tmp/abs_evil5.txt").await; + append(&mut a, "///./tmp/abs_evil6.txt").await; + append(&mut a, "/../tmp/rel_evil.txt").await; + append(&mut a, "../rel_evil2.txt").await; + append(&mut a, "./../rel_evil3.txt").await; + append(&mut a, "some/../../rel_evil4.txt").await; + append(&mut a, "").await; + append(&mut a, "././//./..").await; + append(&mut a, "..").await; + append(&mut a, "/////////..").await; + append(&mut a, "/////////").await; + a.into_inner().await.unwrap() + }; + + let mut ar = Archive::new(&evil_tar[..]); + t!(ar.unpack(td.path()).await); + + assert!(fs::metadata("/tmp/abs_evil.txt").await.is_err()); + assert!(fs::metadata("/tmp/abs_evil.txt2").await.is_err()); + assert!(fs::metadata("/tmp/abs_evil.txt3").await.is_err()); + assert!(fs::metadata("/tmp/abs_evil.txt4").await.is_err()); + assert!(fs::metadata("/tmp/abs_evil.txt5").await.is_err()); + assert!(fs::metadata("/tmp/abs_evil.txt6").await.is_err()); + assert!(fs::metadata("/tmp/rel_evil.txt").await.is_err()); + assert!(fs::metadata("/tmp/rel_evil.txt").await.is_err()); + assert!(fs::metadata(td.path().join("../tmp/rel_evil.txt")) + .await + .is_err()); + assert!(fs::metadata(td.path().join("../rel_evil2.txt")) + .await + .is_err()); + assert!(fs::metadata(td.path().join("../rel_evil3.txt")) + .await + .is_err()); + assert!(fs::metadata(td.path().join("../rel_evil4.txt")) + .await + .is_err()); + + // The `some` subdirectory should not be created because the only + // filename that references this has '..'. + assert!(fs::metadata(td.path().join("some")).await.is_err()); + + // The `tmp` subdirectory should be created and within this + // subdirectory, there should be files named `abs_evil.txt` through + // `abs_evil6.txt`. + assert!(fs::metadata(td.path().join("tmp")) + .await + .map(|m| m.is_dir()) + .unwrap_or(false)); + assert!(fs::metadata(td.path().join("tmp/abs_evil.txt")) + .await + .map(|m| m.is_file()) + .unwrap_or(false)); + assert!(fs::metadata(td.path().join("tmp/abs_evil2.txt")) + .await + .map(|m| m.is_file()) + .unwrap_or(false)); + assert!(fs::metadata(td.path().join("tmp/abs_evil3.txt")) + .await + .map(|m| m.is_file()) + .unwrap_or(false)); + assert!(fs::metadata(td.path().join("tmp/abs_evil4.txt")) + .await + .map(|m| m.is_file()) + .unwrap_or(false)); + assert!(fs::metadata(td.path().join("tmp/abs_evil5.txt")) + .await + .map(|m| m.is_file()) + .unwrap_or(false)); + assert!(fs::metadata(td.path().join("tmp/abs_evil6.txt")) + .await + .map(|m| m.is_file()) + .unwrap_or(false)); +} + +#[tokio::test] +async fn octal_spaces() { + let rdr = Cursor::new(tar!("spaces.tar")); + let mut ar = Archive::new(rdr); + + let entry = ar.entries().unwrap().next().await.unwrap().unwrap(); + assert_eq!(entry.header().mode().unwrap() & 0o777, 0o777); + assert_eq!(entry.header().uid().unwrap(), 0); + assert_eq!(entry.header().gid().unwrap(), 0); + assert_eq!(entry.header().size().unwrap(), 2); + assert_eq!(entry.header().mtime().unwrap(), 0o12_440_016_664); + assert_eq!(entry.header().cksum().unwrap(), 0o4253); +} + +#[tokio::test] +async fn extracting_malformed_tar_null_blocks() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let mut ar = Builder::new(Vec::new()); + + let path1 = td.path().join("tmpfile1"); + let path2 = td.path().join("tmpfile2"); + t!(File::create(&path1).await); + t!(File::create(&path2).await); + t!(ar + .append_file("tmpfile1", &mut t!(File::open(&path1).await)) + .await); + let mut data = t!(ar.into_inner().await); + let amt = data.len(); + data.truncate(amt - 512); + let mut ar = Builder::new(data); + t!(ar + .append_file("tmpfile2", &mut t!(File::open(&path2).await)) + .await); + t!(ar.finish().await); + + let data = t!(ar.into_inner().await); + let mut ar = Archive::new(&data[..]); + assert!(ar.unpack(td.path()).await.is_ok()); +} + +#[tokio::test] +async fn empty_filename() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + let rdr = Cursor::new(tar!("empty_filename.tar")); + let mut ar = Archive::new(rdr); + assert!(ar.unpack(td.path()).await.is_ok()); +} + +#[tokio::test] +async fn file_times() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + let rdr = Cursor::new(tar!("file_times.tar")); + let mut ar = Archive::new(rdr); + t!(ar.unpack(td.path()).await); + + let meta = fs::metadata(td.path().join("a")).await.unwrap(); + let mtime = FileTime::from_last_modification_time(&meta); + let atime = FileTime::from_last_access_time(&meta); + assert_eq!(mtime.unix_seconds(), 1_000_000_000); + assert_eq!(mtime.nanoseconds(), 0); + assert_eq!(atime.unix_seconds(), 1_000_000_000); + assert_eq!(atime.nanoseconds(), 0); +} + +#[tokio::test] +async fn backslash_treated_well() { + // Insert a file into an archive with a backslash + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + let mut ar = Builder::new(Vec::::new()); + t!(ar.append_dir("foo\\bar", td.path()).await); + let mut ar = Archive::new(Cursor::new(t!(ar.into_inner().await))); + let f = t!(t!(ar.entries()).next().await.unwrap()); + if cfg!(unix) { + assert_eq!(t!(f.header().path()).to_str(), Some("foo\\bar")); + } else { + assert_eq!(t!(f.header().path()).to_str(), Some("foo/bar")); + } + + // Unpack an archive with a backslash in the name + let mut ar = Builder::new(Vec::::new()); + let mut header = Header::new_gnu(); + header.set_metadata(&t!(fs::metadata(td.path()).await)); + header.set_size(0); + for (a, b) in header.as_old_mut().name.iter_mut().zip(b"foo\\bar\x00") { + *a = *b; + } + header.set_cksum(); + t!(ar.append(&header, &mut io::empty()).await); + let data = t!(ar.into_inner().await); + let mut ar = Archive::new(&data[..]); + let f = t!(t!(ar.entries()).next().await.unwrap()); + assert_eq!(t!(f.header().path()).to_str(), Some("foo\\bar")); + + let mut ar = Archive::new(&data[..]); + t!(ar.unpack(td.path()).await); + assert!(fs::metadata(td.path().join("foo\\bar")).await.is_ok()); +} + +#[cfg(unix)] +#[tokio::test] +async fn nul_bytes_in_path() { + use std::{ffi::OsStr, os::unix::prelude::*}; + + let nul_path = OsStr::from_bytes(b"foo\0"); + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + let mut ar = Builder::new(Vec::::new()); + let err = ar.append_dir(nul_path, td.path()).await.unwrap_err(); + assert!(err.to_string().contains("contains a nul byte")); +} + +#[tokio::test] +async fn links() { + let mut ar = Archive::new(Cursor::new(tar!("link.tar"))); + let mut entries = t!(ar.entries()); + let link = t!(entries.next().await.unwrap()); + assert_eq!( + t!(link.header().link_name()).as_ref().map(|p| &**p), + Some(Path::new("file")) + ); + let other = t!(entries.next().await.unwrap()); + assert!(t!(other.header().link_name()).is_none()); +} + +#[tokio::test] +#[cfg(unix)] // making symlinks on windows is hard +async fn unpack_links() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + let mut ar = Archive::new(Cursor::new(tar!("link.tar"))); + t!(ar.unpack(td.path()).await); + + let md = t!(fs::symlink_metadata(td.path().join("lnk")).await); + assert!(md.file_type().is_symlink()); + assert_eq!( + &*t!(fs::read_link(td.path().join("lnk")).await), + Path::new("file") + ); + t!(File::open(td.path().join("lnk")).await); +} + +#[tokio::test] +async fn pax_simple() { + let mut ar = Archive::new(tar!("pax.tar")); + let mut entries = t!(ar.entries()); + + let mut first = t!(entries.next().await.unwrap()); + let mut attributes = t!(first.pax_extensions().await).unwrap(); + let first = t!(attributes.next().unwrap()); + let second = t!(attributes.next().unwrap()); + let third = t!(attributes.next().unwrap()); + assert!(attributes.next().is_none()); + + assert_eq!(first.key(), Ok("mtime")); + assert_eq!(first.value(), Ok("1453146164.953123768")); + assert_eq!(second.key(), Ok("atime")); + assert_eq!(second.value(), Ok("1453251915.24892486")); + assert_eq!(third.key(), Ok("ctime")); + assert_eq!(third.value(), Ok("1453146164.953123768")); +} + +#[tokio::test] +async fn pax_path() { + let mut ar = Archive::new(tar!("pax2.tar")); + let mut entries = t!(ar.entries()); + + let first = t!(entries.next().await.unwrap()); + assert!(first.path().unwrap().ends_with("aaaaaaaaaaaaaaa")); +} + +#[tokio::test] +async fn long_name_trailing_nul() { + let mut b = Builder::new(Vec::::new()); + + let mut h = Header::new_gnu(); + t!(h.set_path("././@LongLink")); + h.set_size(4); + h.set_entry_type(EntryType::new(b'L')); + h.set_cksum(); + t!(b.append(&h, b"foo\0" as &[u8]).await); + let mut h = Header::new_gnu(); + + t!(h.set_path("bar")); + h.set_size(6); + h.set_entry_type(EntryType::file()); + h.set_cksum(); + t!(b.append(&h, b"foobar" as &[u8]).await); + + let contents = t!(b.into_inner().await); + let mut a = Archive::new(&contents[..]); + + let e = t!(t!(a.entries()).next().await.unwrap()); + assert_eq!(&*e.path_bytes(), b"foo"); +} + +#[tokio::test] +async fn long_linkname_trailing_nul() { + let mut b = Builder::new(Vec::::new()); + + let mut h = Header::new_gnu(); + t!(h.set_path("././@LongLink")); + h.set_size(4); + h.set_entry_type(EntryType::new(b'K')); + h.set_cksum(); + t!(b.append(&h, b"foo\0" as &[u8]).await); + let mut h = Header::new_gnu(); + + t!(h.set_path("bar")); + h.set_size(6); + h.set_entry_type(EntryType::file()); + h.set_cksum(); + t!(b.append(&h, b"foobar" as &[u8]).await); + + let contents = t!(b.into_inner().await); + let mut a = Archive::new(&contents[..]); + + let e = t!(t!(a.entries()).next().await.unwrap()); + assert_eq!(&*e.link_name_bytes().unwrap(), b"foo"); +} + +#[tokio::test] +async fn encoded_long_name_has_trailing_nul() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + let path = td.path().join("foo"); + t!(t!(File::create(&path).await).write_all(b"test").await); + + let mut b = Builder::new(Vec::::new()); + let long = repeat("abcd").take(200).collect::(); + + t!(b.append_file(&long, &mut t!(File::open(&path).await)).await); + + let contents = t!(b.into_inner().await); + let mut a = Archive::new(&contents[..]); + + let mut e = t!(t!(a.entries_raw()).next().await.unwrap()); + let mut name = Vec::new(); + t!(e.read_to_end(&mut name).await); + assert_eq!(name[name.len() - 1], 0); + + let header_name = &e.header().as_gnu().unwrap().name; + assert!(header_name.starts_with(b"././@LongLink\x00")); +} + +#[tokio::test] +async fn reading_sparse() { + let rdr = Cursor::new(tar!("sparse.tar")); + let mut ar = Archive::new(rdr); + let mut entries = t!(ar.entries()); + + let mut a = t!(entries.next().await.unwrap()); + let mut s = String::new(); + assert_eq!(&*a.header().path_bytes(), b"sparse_begin.txt"); + t!(a.read_to_string(&mut s).await); + assert_eq!(&s[..5], "test\n"); + assert!(s[5..].chars().all(|x| x == '\u{0}')); + + let mut a = t!(entries.next().await.unwrap()); + let mut s = String::new(); + assert_eq!(&*a.header().path_bytes(), b"sparse_end.txt"); + t!(a.read_to_string(&mut s).await); + assert!(s[..s.len() - 9].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[s.len() - 9..], "test_end\n"); + + let mut a = t!(entries.next().await.unwrap()); + let mut s = String::new(); + assert_eq!(&*a.header().path_bytes(), b"sparse_ext.txt"); + t!(a.read_to_string(&mut s).await); + assert!(s[..0x1000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x1000..0x1000 + 5], "text\n"); + assert!(s[0x1000 + 5..0x3000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x3000..0x3000 + 5], "text\n"); + assert!(s[0x3000 + 5..0x5000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x5000..0x5000 + 5], "text\n"); + assert!(s[0x5000 + 5..0x7000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x7000..0x7000 + 5], "text\n"); + assert!(s[0x7000 + 5..0x9000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x9000..0x9000 + 5], "text\n"); + assert!(s[0x9000 + 5..0xb000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0xb000..0xb000 + 5], "text\n"); + + let mut a = t!(entries.next().await.unwrap()); + let mut s = String::new(); + assert_eq!(&*a.header().path_bytes(), b"sparse.txt"); + t!(a.read_to_string(&mut s).await); + assert!(s[..0x1000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x1000..0x1000 + 6], "hello\n"); + assert!(s[0x1000 + 6..0x2fa0].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x2fa0..0x2fa0 + 6], "world\n"); + assert!(s[0x2fa0 + 6..0x4000].chars().all(|x| x == '\u{0}')); + + assert!(entries.next().await.is_none()); +} + +#[tokio::test] +async fn extract_sparse() { + let rdr = Cursor::new(tar!("sparse.tar")); + let mut ar = Archive::new(rdr); + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + t!(ar.unpack(td.path()).await); + + let mut s = String::new(); + t!(t!(File::open(td.path().join("sparse_begin.txt")).await) + .read_to_string(&mut s) + .await); + assert_eq!(&s[..5], "test\n"); + assert!(s[5..].chars().all(|x| x == '\u{0}')); + + s.truncate(0); + t!(t!(File::open(td.path().join("sparse_end.txt")).await) + .read_to_string(&mut s) + .await); + assert!(s[..s.len() - 9].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[s.len() - 9..], "test_end\n"); + + s.truncate(0); + t!(t!(File::open(td.path().join("sparse_ext.txt")).await) + .read_to_string(&mut s) + .await); + assert!(s[..0x1000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x1000..0x1000 + 5], "text\n"); + assert!(s[0x1000 + 5..0x3000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x3000..0x3000 + 5], "text\n"); + assert!(s[0x3000 + 5..0x5000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x5000..0x5000 + 5], "text\n"); + assert!(s[0x5000 + 5..0x7000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x7000..0x7000 + 5], "text\n"); + assert!(s[0x7000 + 5..0x9000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x9000..0x9000 + 5], "text\n"); + assert!(s[0x9000 + 5..0xb000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0xb000..0xb000 + 5], "text\n"); + + s.truncate(0); + t!(t!(File::open(td.path().join("sparse.txt")).await) + .read_to_string(&mut s) + .await); + assert!(s[..0x1000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x1000..0x1000 + 6], "hello\n"); + assert!(s[0x1000 + 6..0x2fa0].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x2fa0..0x2fa0 + 6], "world\n"); + assert!(s[0x2fa0 + 6..0x4000].chars().all(|x| x == '\u{0}')); +} + +#[tokio::test] +async fn path_separators() { + let mut ar = Builder::new(Vec::new()); + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let path = td.path().join("test"); + t!(t!(File::create(&path).await).write_all(b"test").await); + + let short_path: PathBuf = repeat("abcd").take(2).collect(); + let long_path: PathBuf = repeat("abcd").take(50).collect(); + + // Make sure UStar headers normalize to Unix path separators + let mut header = Header::new_ustar(); + + t!(header.set_path(&short_path)); + assert_eq!(t!(header.path()), short_path); + assert!(!header.path_bytes().contains(&b'\\')); + + t!(header.set_path(&long_path)); + assert_eq!(t!(header.path()), long_path); + assert!(!header.path_bytes().contains(&b'\\')); + + // Make sure GNU headers normalize to Unix path separators, + // including the `@LongLink` fallback used by `append_file`. + t!(ar + .append_file(&short_path, &mut t!(File::open(&path).await)) + .await); + t!(ar + .append_file(&long_path, &mut t!(File::open(&path).await)) + .await); + + let rd = Cursor::new(t!(ar.into_inner().await)); + let mut ar = Archive::new(rd); + let mut entries = t!(ar.entries()); + + let entry = t!(entries.next().await.unwrap()); + assert_eq!(t!(entry.path()), short_path); + assert!(!entry.path_bytes().contains(&b'\\')); + + let entry = t!(entries.next().await.unwrap()); + assert_eq!(t!(entry.path()), long_path); + assert!(!entry.path_bytes().contains(&b'\\')); + + assert!(entries.next().await.is_none()); +} + +#[tokio::test] +#[cfg(unix)] +async fn append_path_symlink() { + use std::{borrow::Cow, env, os::unix::fs::symlink}; + + let mut ar = Builder::new(Vec::new()); + ar.follow_symlinks(false); + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let long_linkname = repeat("abcd").take(30).collect::(); + let long_pathname = repeat("dcba").take(30).collect::(); + t!(env::set_current_dir(td.path())); + // "short" path name / short link name + t!(symlink("testdest", "test")); + t!(ar.append_path("test").await); + // short path name / long link name + t!(symlink(&long_linkname, "test2")); + t!(ar.append_path("test2").await); + // long path name / long link name + t!(symlink(&long_linkname, &long_pathname)); + t!(ar.append_path(&long_pathname).await); + + let rd = Cursor::new(t!(ar.into_inner().await)); + let mut ar = Archive::new(rd); + let mut entries = t!(ar.entries()); + + let entry = t!(entries.next().await.unwrap()); + assert_eq!(t!(entry.path()), Path::new("test")); + assert_eq!( + t!(entry.link_name()), + Some(Cow::from(Path::new("testdest"))) + ); + assert_eq!(t!(entry.header().size()), 0); + + let entry = t!(entries.next().await.unwrap()); + assert_eq!(t!(entry.path()), Path::new("test2")); + assert_eq!( + t!(entry.link_name()), + Some(Cow::from(Path::new(&long_linkname))) + ); + assert_eq!(t!(entry.header().size()), 0); + + let entry = t!(entries.next().await.unwrap()); + assert_eq!(t!(entry.path()), Path::new(&long_pathname)); + assert_eq!( + t!(entry.link_name()), + Some(Cow::from(Path::new(&long_linkname))) + ); + assert_eq!(t!(entry.header().size()), 0); + + assert!(entries.next().await.is_none()); +} + +#[tokio::test] +async fn name_with_slash_doesnt_fool_long_link_and_bsd_compat() { + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + + let mut ar = Builder::new(Vec::new()); + + let mut h = Header::new_gnu(); + t!(h.set_path("././@LongLink")); + h.set_size(4); + h.set_entry_type(EntryType::new(b'L')); + h.set_cksum(); + t!(ar.append(&h, b"foo\0" as &[u8]).await); + + let mut header = Header::new_gnu(); + header.set_entry_type(EntryType::Regular); + t!(header.set_path("testdir/")); + header.set_size(0); + header.set_cksum(); + t!(ar.append(&header, &mut io::empty()).await); + + // Extracting + let rdr = Cursor::new(t!(ar.into_inner().await)); + let mut ar = Archive::new(rdr); + t!(ar.unpack(td.path()).await); + + // Iterating + let rdr = Cursor::new(ar.into_inner().map_err(|_| ()).unwrap().into_inner()); + let mut ar = Archive::new(rdr); + let mut entries = t!(ar.entries()); + while let Some(entry) = entries.next().await { + assert!(entry.is_ok()); + } + + assert!(td.path().join("foo").is_file()); +} + +#[tokio::test] +async fn insert_local_file_different_name() { + let mut ar = Builder::new(Vec::new()); + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + let path = td.path().join("directory"); + t!(fs::create_dir(&path).await); + ar.append_path_with_name(&path, "archive/dir") + .await + .unwrap(); + let path = td.path().join("file"); + t!(t!(File::create(&path).await).write_all(b"test").await); + ar.append_path_with_name(&path, "archive/dir/f") + .await + .unwrap(); + + let rd = Cursor::new(t!(ar.into_inner().await)); + let mut ar = Archive::new(rd); + let mut entries = t!(ar.entries()); + let entry = t!(entries.next().await.unwrap()); + assert_eq!(t!(entry.path()), Path::new("archive/dir")); + let entry = t!(entries.next().await.unwrap()); + assert_eq!(t!(entry.path()), Path::new("archive/dir/f")); + assert!(entries.next().await.is_none()); +} + +#[tokio::test] +#[cfg(unix)] +async fn tar_directory_containing_symlink_to_directory() { + use std::os::unix::fs::symlink; + + let td = t!(TempBuilder::new().prefix("async-tar").tempdir()); + let dummy_src = t!(TempBuilder::new().prefix("dummy_src").tempdir()); + let dummy_dst = td.path().join("dummy_dst"); + let mut ar = Builder::new(Vec::new()); + t!(symlink(dummy_src.path().display().to_string(), &dummy_dst)); + + assert!(dummy_dst.read_link().is_ok()); + assert!(dummy_dst.read_link().unwrap().is_dir()); + ar.append_dir_all("symlinks", td.path()).await.unwrap(); + ar.finish().await.unwrap(); +} diff --git a/tests/archives/directory.tar b/tests/archives/directory.tar new file mode 100644 index 0000000000000000000000000000000000000000..ec6867a4be3a84766e2b4d9dc8903abc66187359 GIT binary patch literal 10240 zcmeIxJq|)448ZZuo`M_DA|L0$r=t!&_58wOT!=2bnE3C~p&=!|w$;n@&=w^nX~{~` z_uNZ8#6pa!!BUB$ra2aIY5LDvgZp}`SLfvXI9^WcxZ|h#_~lPmsy|HMfIi@_=O5ah zSYH6*4)}*Zt?c_4_X+i__^pYYqPV{+Hbr$n-z}0R#|0009IL lKmY**5I_I{1Q0*~0R#|0009ILKmY**5I_I{1Q3`d@C27|H#z_S literal 0 HcmV?d00001 diff --git a/tests/archives/duplicate_dirs.tar b/tests/archives/duplicate_dirs.tar new file mode 100644 index 0000000000000000000000000000000000000000..fc19b9d01a7cd6a3134b649faceb634e605ec22c GIT binary patch literal 2048 zcmeHFZ3=)e2;EVyl_{~&1Z&vPo{`P46yG~M6|l@5qlGlPn3 z%)zLulS3{7O3k_^B1Qr%nb0WUbdv6$H@{jJq_wt_xs}N}FT*&>-El^tt?%E-$NQQi b4jBNC%BMr0_y6ntfBHDHxa>f7AgKd8zT`G5 literal 0 HcmV?d00001 diff --git a/tests/archives/empty_filename.tar b/tests/archives/empty_filename.tar new file mode 100644 index 0000000000000000000000000000000000000000..dd1c8a9fd5611c88405a307af267a15b302b8970 GIT binary patch literal 512 zcmZQzU|=XME=ep>Pyn)M0}Oz`+|-l-NEw=&8X(gkDG*>VG%_(UFfcMSH8)`}Ff=eT iF=tRPrHyk?O@(EtkZ4_aCRVo00IagfB*srAbeA&w`R literal 0 HcmV?d00001 diff --git a/tests/archives/pax.tar b/tests/archives/pax.tar new file mode 100644 index 0000000000000000000000000000000000000000..6de2d3bd35786d6cc4319404f6825c9baa874dbc GIT binary patch literal 10240 zcmeHLZExH*682~P3U-U8YusA<{^~mIrg7Wg;OoKh0S6d{!IiiYYl&1z%Im!#|NWk! zR_nDBr?`WA7X-UVEWXWf=Hblfp~Cjx`Q0yq$HLffFz8L%KXQ}k(CIuoc&1+mztLcD zKu`Q0KRexi_n_Mw4aU6zEj;L;(;FVJyJy<}lWI!qxPeKZT=eBm_B$+hG8ZS^!LZ*Q zjJl&iI7O@1ACD$MZ_4;~wm0lfyThu5Wx=Bxo`|a; zP&^mxgf&nP8^JB!2U08Qcfx+yZv?z_sWuiPmyJ2VVm}%gr4DPpBW!-4Khb6=jCB68 zaTTOG7X`GFn$)?%&f4uny0l!-iuSHZ8`~Zk+X##(w3SYq^?xE2>!{3yawu1o)wCUU z+ug$NS#OicNGR(qJ9~4{o&$$e9QDFZBe)Ukl{RtZDxuNxpe$u3s3}yWW2qAOXYk^z zKs!I3zdkz+^SBWNv9JK5s4ZzUf;l@!oH3afnYg!7FBxYS^K(X;8R-&+aTT+bkxrP9 zU8K^oOfC!u7_k7u+H$vF^$7yJj>Xd z_OF|5FstIA+Q)`$9>}k+HqZ!Ko=g#-X?)m(Gn*R-kz0u?R9}h-*{`VNi zKcnAnf5qloj~Dj*9LRoRdf25V|6^t0zAAT)lHr#MNaPSv&&3;HMG&qeg&HxW7SDBJkOGJF*gkl_qA2%Zxv5~Rm0c?3SBy~%8`c5r-YbRKSDf|oP4 zVWUEYD|sUe5lbFwleDSVzToEio8E+I9MDtAH@#^9adxIbwLol%%4yI#B3!X>90Ulo ziW`RG%0ZkW4v#bIdDc>wAdsmrVn!-9`>s~2aA|~C*w}9yC9;hZ3Gw{?_fnYk3Dmu? z;)Y1z$?yf{3hvUApCI$z>+@t{fB^Fca;(!<|47_+c?URh+g13?AQ|P~4S-;+y5hdp)fdke`v-mft2;lda;N#W;Kqt-06JZ!|!S5i754ER&Qqeunw%Bf$e9U!R*S%xk+32e} z-t3*G_h8c}M5|`%@u}L_6j~8(KfU?q*=f@t*p4z}ZJSzO<8y7Nf9PwB&){>Jqz>29 z6@{)5-y&FX6;0SGMY3`B`t1cJeHl5-WE^q3lp;f+;+EkKxD9L6F_|d3B2&IqX^I;o zE{Akit~B5R+Ct4MDGpHrZXpu0Li=iLtLtW+B?vlPWl=4?8Im@SusH^}@r4p8ZA#o> zbK!H`r}SpZXXHxwB64q^F3pZJw3}2=$jOyks!+b>Ea-9$MEt$GcIZ}e#27tR4Yq($B_Y4J>hP)wg9>Ukr{nm=d-C1+jD7!>?0wmU!7X-vi1?sS8ap#p))5E~ zMPIUa0mTXB4a55`5e{|d{MZjJ6{-euwuv`Twmf7=D=_ut#8zz#f4;0(%7Z2>ka5{2L#DtDyh@ literal 0 HcmV?d00001 diff --git a/tests/archives/pax2.tar b/tests/archives/pax2.tar new file mode 100644 index 0000000000000000000000000000000000000000..c0c8ed314ce8236a40a841950f0b7fbbfe8d7b44 GIT binary patch literal 10240 zcmeIyOA5j;5P;#VJw;ETGnuBP3l9@SP*4!9;PFkAV(X%AqPX~HGYv!Y_|j>NlUPkg zF%}t1il(kz&MDrr)TyRs8br*iIx(>}xa#J95Z6l|&uL3PSn3~_@Zzge?04<>NOx zA|KM+d<&E6*zb?qwFVQl#)~ec>%4hsHCN-b+AXa%3ylvxS2xq@_}zcJw^Dl=k>^}|y!F4O8+27Db;!AShW?L-?d<>NQ~i04*UxQkO=<`rfB*sr qAbYZ( literal 0 HcmV?d00001 diff --git a/tests/archives/reading_files.tar b/tests/archives/reading_files.tar new file mode 100644 index 0000000000000000000000000000000000000000..67e79ffdfe2d85519305dff62a70a9950058a1d1 GIT binary patch literal 10240 zcmeIxJqpAi5QgEYa|(|zX3flb45Zs?e_)Tl+NF>rg+hu~3&9V^;ep9AerZ`|Ny-s@ z(fwH?)!8J@4=Z6x9!u`#E+Ff9N8 literal 0 HcmV?d00001 diff --git a/tests/archives/spaces.tar b/tests/archives/spaces.tar new file mode 100644 index 0000000000000000000000000000000000000000..68e2a519eebd0d751f17106db3a5722cfc76f223 GIT binary patch literal 2048 zcmYdfpgk}&FfcbaS71;80RuD|ENG-)Xk=nyU|?uwW@Z8uF)=bVR$wroz0*d#4T)SM UJ_tqwU^E0qLtr!nMq~&801;dXMF0Q* literal 0 HcmV?d00001 diff --git a/tests/archives/sparse.tar b/tests/archives/sparse.tar new file mode 100644 index 0000000000000000000000000000000000000000..216aed1d780cf5ec5e1b2143e284bdbc362124aa GIT binary patch literal 10240 zcmeHM;cCMm5O#m}6n%nmMok`M4^X;Iwn1o1w5*T6yV$f1rh$MmTux|XPW^Ix=cn=F zj=Rp;?VJ7l-oN;>7a0m*skIh(k&OYmf(-Y475{{+8VQYRB(+xnl@goR48BB=C+BzF zW+RU8QPaq9a<+3LOQIX1;8J>KurBd;smrF;L%r{{{HXyv07av{b^bZBvSBR@xS)T# z|H!3&(a+=={X=c!RsRZ%0BLk>)}?>h+%%7?B{Gt%T=kz4G?{k{G|K?3&s$Vyt~Gkl z|JnbU{tck!!~U-;trYJ6s@8I4`X`l6?EmV49Z8$)Ps_*{=ZP#J3CP4#Ei}>#$9Pn* zkDiXE34j=qEd?>)?U~>I3W`^1S2_Nv87&Ks<6l<1QhUqsPt9mqcpU$-;+5K4j(=)K z%fbui|9oeEVuQ|-Gxras8s|DOFy7L-vm*+*-|`o7q##7kT#Q~ zGf$W|;l-^V_qBhj+m1V7sqk!V6Tys9EVjYYR*)n{r45g<@m&xJMJylJ4rFe0g_AN6KicgmV>_m8vSzrOC=F_s3qvsdUnj)LEGNc01w?JFS0ZEz9-!Lu2f-{+pq%m;N_&K_M3g#4Lh+iQB@^-{kTq z`v0(cc#+GZ{#%{yf6y1`|LPktv*5D+2XCVPQ;9!004m2zB5iayK{BcHyQ}^6%{?D| z-?uy#5I^{lKfn2xZdV_gY$hIbU6z0^*01+8gGv4WRR4_(_rHtW$u_S4>0aR*1V8`; qKmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1VG^L5_ku0yMiD9 literal 0 HcmV?d00001 diff --git a/tests/entry.rs b/tests/entry.rs new file mode 100644 index 00000000..f93df8ae --- /dev/null +++ b/tests/entry.rs @@ -0,0 +1,350 @@ +extern crate tokio_tar as async_tar; + +extern crate tempfile; + +use tokio::{fs::File, io::AsyncReadExt}; +use tokio_stream::*; + +use tempfile::Builder; + +macro_rules! t { + ($e:expr) => { + match $e { + Ok(v) => v, + Err(e) => panic!("{} returned {}", stringify!($e), e), + } + }; +} + +#[tokio::test] +async fn absolute_symlink() { + let mut ar = async_tar::Builder::new(Vec::new()); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Symlink); + t!(header.set_path("foo")); + t!(header.set_link_name("/bar")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let bytes = t!(ar.into_inner().await); + let mut ar = async_tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + t!(ar.unpack(td.path()).await); + + t!(td.path().join("foo").symlink_metadata()); + + let mut ar = async_tar::Archive::new(&bytes[..]); + let mut entries = t!(ar.entries()); + let entry = t!(entries.next().await.unwrap()); + assert_eq!(&*entry.link_name_bytes().unwrap(), b"/bar"); +} + +#[tokio::test] +async fn absolute_hardlink() { + let td = t!(Builder::new().prefix("tar").tempdir()); + let mut ar = async_tar::Builder::new(Vec::new()); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Regular); + t!(header.set_path("foo")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Link); + t!(header.set_path("bar")); + // This absolute path under tempdir will be created at unpack time + t!(header.set_link_name(td.path().join("foo"))); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let bytes = t!(ar.into_inner().await); + let mut ar = async_tar::Archive::new(&bytes[..]); + + t!(ar.unpack(td.path()).await); + t!(td.path().join("foo").metadata()); + t!(td.path().join("bar").metadata()); +} + +#[tokio::test] +async fn relative_hardlink() { + let mut ar = async_tar::Builder::new(Vec::new()); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Regular); + t!(header.set_path("foo")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Link); + t!(header.set_path("bar")); + t!(header.set_link_name("foo")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let bytes = t!(ar.into_inner().await); + let mut ar = async_tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + t!(ar.unpack(td.path()).await); + t!(td.path().join("foo").metadata()); + t!(td.path().join("bar").metadata()); +} + +#[tokio::test] +async fn absolute_link_deref_error() { + let mut ar = async_tar::Builder::new(Vec::new()); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Symlink); + t!(header.set_path("foo")); + t!(header.set_link_name("/")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Regular); + t!(header.set_path("foo/bar")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let bytes = t!(ar.into_inner().await); + let mut ar = async_tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + assert!(ar.unpack(td.path()).await.is_err()); + t!(td.path().join("foo").symlink_metadata()); + assert!(File::open(td.path().join("foo").join("bar")).await.is_err()); +} + +#[tokio::test] +async fn relative_link_deref_error() { + let mut ar = async_tar::Builder::new(Vec::new()); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Symlink); + t!(header.set_path("foo")); + t!(header.set_link_name("../../../../")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Regular); + t!(header.set_path("foo/bar")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let bytes = t!(ar.into_inner().await); + let mut ar = async_tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + assert!(ar.unpack(td.path()).await.is_err()); + t!(td.path().join("foo").symlink_metadata()); + assert!(File::open(td.path().join("foo").join("bar")).await.is_err()); +} + +#[tokio::test] +#[cfg(unix)] +async fn directory_maintains_permissions() { + use ::std::os::unix::fs::PermissionsExt; + + let mut ar = async_tar::Builder::new(Vec::new()); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Directory); + t!(header.set_path("foo")); + header.set_mode(0o777); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let bytes = t!(ar.into_inner().await); + let mut ar = async_tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + t!(ar.unpack(td.path()).await); + let f = t!(File::open(td.path().join("foo")).await); + let md = t!(f.metadata().await); + assert!(md.is_dir()); + assert_eq!(md.permissions().mode(), 0o40777); +} + +#[tokio::test] +#[cfg(not(windows))] // dangling symlinks have weird permissions +async fn modify_link_just_created() { + let mut ar = async_tar::Builder::new(Vec::new()); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Symlink); + t!(header.set_path("foo")); + t!(header.set_link_name("bar")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Regular); + t!(header.set_path("bar/foo")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Regular); + t!(header.set_path("foo/bar")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let bytes = t!(ar.into_inner().await); + let mut ar = async_tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + t!(ar.unpack(td.path()).await); + + t!(File::open(td.path().join("bar/foo")).await); + t!(File::open(td.path().join("bar/bar")).await); + t!(File::open(td.path().join("foo/foo")).await); + t!(File::open(td.path().join("foo/bar")).await); +} + +#[tokio::test] +async fn parent_paths_error() { + let mut ar = async_tar::Builder::new(Vec::new()); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Symlink); + t!(header.set_path("foo")); + t!(header.set_link_name("..")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Regular); + t!(header.set_path("foo/bar")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let bytes = t!(ar.into_inner().await); + let mut ar = async_tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + assert!(ar.unpack(td.path()).await.is_err()); + t!(td.path().join("foo").symlink_metadata()); + assert!(File::open(td.path().join("foo").join("bar")).await.is_err()); +} + +#[tokio::test] +#[cfg(unix)] +async fn good_parent_paths_ok() { + use std::path::PathBuf; + let mut ar = async_tar::Builder::new(Vec::new()); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Symlink); + t!(header.set_path(PathBuf::from("foo").join("bar"))); + t!(header.set_link_name(PathBuf::from("..").join("bar"))); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Regular); + t!(header.set_path("bar")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let bytes = t!(ar.into_inner().await); + let mut ar = async_tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + t!(ar.unpack(td.path()).await); + t!(td.path().join("foo").join("bar").read_link()); + let dst = t!(td.path().join("foo").join("bar").canonicalize()); + t!(File::open(dst).await); +} + +#[tokio::test] +async fn modify_hard_link_just_created() { + let mut ar = async_tar::Builder::new(Vec::new()); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Link); + t!(header.set_path("foo")); + t!(header.set_link_name("../test")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(1); + header.set_entry_type(async_tar::EntryType::Regular); + t!(header.set_path("foo")); + header.set_cksum(); + t!(ar.append(&header, &b"x"[..]).await); + + let bytes = t!(ar.into_inner().await); + let mut ar = async_tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + + let test = td.path().join("test"); + t!(File::create(&test).await); + + let dir = td.path().join("dir"); + assert!(ar.unpack(&dir).await.is_err()); + + let mut contents = Vec::new(); + t!(t!(File::open(&test).await).read_to_end(&mut contents).await); + assert_eq!(contents.len(), 0); +} + +#[tokio::test] +async fn modify_symlink_just_created() { + let mut ar = async_tar::Builder::new(Vec::new()); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(0); + header.set_entry_type(async_tar::EntryType::Symlink); + t!(header.set_path("foo")); + t!(header.set_link_name("../test")); + header.set_cksum(); + t!(ar.append(&header, &[][..]).await); + + let mut header = async_tar::Header::new_gnu(); + header.set_size(1); + header.set_entry_type(async_tar::EntryType::Regular); + t!(header.set_path("foo")); + header.set_cksum(); + t!(ar.append(&header, &b"x"[..]).await); + + let bytes = t!(ar.into_inner().await); + let mut ar = async_tar::Archive::new(&bytes[..]); + + let td = t!(Builder::new().prefix("tar").tempdir()); + + let test = td.path().join("test"); + t!(File::create(&test).await); + + let dir = td.path().join("dir"); + t!(ar.unpack(&dir).await); + + let mut contents = Vec::new(); + t!(t!(File::open(&test).await).read_to_end(&mut contents).await); + assert_eq!(contents.len(), 0); +} diff --git a/tests/header/mod.rs b/tests/header/mod.rs new file mode 100644 index 00000000..33f479d4 --- /dev/null +++ b/tests/header/mod.rs @@ -0,0 +1,243 @@ +#![allow(clippy::cognitive_complexity)] + +use std::{ + fs::{self, File}, + io::Write, + iter, mem, + path::Path, + thread, time, +}; + +use tempfile::Builder; + +use async_tar::{GnuHeader, Header, HeaderMode}; + +#[test] +fn default_gnu() { + let mut h = Header::new_gnu(); + assert!(h.as_gnu().is_some()); + assert!(h.as_gnu_mut().is_some()); + assert!(h.as_ustar().is_none()); + assert!(h.as_ustar_mut().is_none()); +} + +#[test] +fn goto_old() { + let mut h = Header::new_old(); + assert!(h.as_gnu().is_none()); + assert!(h.as_gnu_mut().is_none()); + assert!(h.as_ustar().is_none()); + assert!(h.as_ustar_mut().is_none()); +} + +#[test] +fn goto_ustar() { + let mut h = Header::new_ustar(); + assert!(h.as_gnu().is_none()); + assert!(h.as_gnu_mut().is_none()); + assert!(h.as_ustar().is_some()); + assert!(h.as_ustar_mut().is_some()); +} + +#[test] +fn link_name() { + let mut h = Header::new_gnu(); + t!(h.set_link_name("foo")); + assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("foo")); + t!(h.set_link_name("../foo")); + assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("../foo")); + t!(h.set_link_name("foo/bar")); + assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("foo/bar")); + t!(h.set_link_name("foo\\ba")); + if cfg!(windows) { + assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("foo/ba")); + } else { + assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("foo\\ba")); + } + + let name = "foo\\bar\0"; + for (slot, val) in h.as_old_mut().linkname.iter_mut().zip(name.as_bytes()) { + *slot = *val; + } + assert_eq!(t!(h.link_name()).unwrap().to_str(), Some("foo\\bar")); + + assert!(h.set_link_name("\0").is_err()); +} + +#[test] +fn mtime() { + let h = Header::new_gnu(); + assert_eq!(t!(h.mtime()), 0); + + let h = Header::new_ustar(); + assert_eq!(t!(h.mtime()), 0); + + let h = Header::new_old(); + assert_eq!(t!(h.mtime()), 0); +} + +#[test] +fn user_and_group_name() { + let mut h = Header::new_gnu(); + t!(h.set_username("foo")); + t!(h.set_groupname("bar")); + assert_eq!(t!(h.username()), Some("foo")); + assert_eq!(t!(h.groupname()), Some("bar")); + + h = Header::new_ustar(); + t!(h.set_username("foo")); + t!(h.set_groupname("bar")); + assert_eq!(t!(h.username()), Some("foo")); + assert_eq!(t!(h.groupname()), Some("bar")); + + h = Header::new_old(); + assert_eq!(t!(h.username()), None); + assert_eq!(t!(h.groupname()), None); + assert!(h.set_username("foo").is_err()); + assert!(h.set_groupname("foo").is_err()); +} + +#[test] +fn dev_major_minor() { + let mut h = Header::new_gnu(); + t!(h.set_device_major(1)); + t!(h.set_device_minor(2)); + assert_eq!(t!(h.device_major()), Some(1)); + assert_eq!(t!(h.device_minor()), Some(2)); + + h = Header::new_ustar(); + t!(h.set_device_major(1)); + t!(h.set_device_minor(2)); + assert_eq!(t!(h.device_major()), Some(1)); + assert_eq!(t!(h.device_minor()), Some(2)); + + h.as_ustar_mut().unwrap().dev_minor[0] = 0x7f; + h.as_ustar_mut().unwrap().dev_major[0] = 0x7f; + assert!(h.device_major().is_err()); + assert!(h.device_minor().is_err()); + + h.as_ustar_mut().unwrap().dev_minor[0] = b'g'; + h.as_ustar_mut().unwrap().dev_major[0] = b'h'; + assert!(h.device_major().is_err()); + assert!(h.device_minor().is_err()); + + h = Header::new_old(); + assert_eq!(t!(h.device_major()), None); + assert_eq!(t!(h.device_minor()), None); + assert!(h.set_device_major(1).is_err()); + assert!(h.set_device_minor(1).is_err()); +} + +#[test] +fn set_path() { + let mut h = Header::new_gnu(); + t!(h.set_path("foo")); + assert_eq!(t!(h.path()).to_str(), Some("foo")); + t!(h.set_path("foo/")); + assert_eq!(t!(h.path()).to_str(), Some("foo/")); + t!(h.set_path("foo/bar")); + assert_eq!(t!(h.path()).to_str(), Some("foo/bar")); + t!(h.set_path("foo\\bar")); + if cfg!(windows) { + assert_eq!(t!(h.path()).to_str(), Some("foo/bar")); + } else { + assert_eq!(t!(h.path()).to_str(), Some("foo\\bar")); + } + + let long_name = iter::repeat("foo").take(100).collect::(); + let medium1 = iter::repeat("foo").take(52).collect::(); + let medium2 = iter::repeat("fo/").take(52).collect::(); + + assert!(h.set_path(&long_name).is_err()); + assert!(h.set_path(&medium1).is_err()); + assert!(h.set_path(&medium2).is_err()); + assert!(h.set_path("\0").is_err()); + + h = Header::new_ustar(); + t!(h.set_path("foo")); + assert_eq!(t!(h.path()).to_str(), Some("foo")); + + assert!(h.set_path(&long_name).is_err()); + assert!(h.set_path(&medium1).is_err()); + t!(h.set_path(&medium2)); + assert_eq!(t!(h.path()).to_str(), Some(&medium2[..])); +} + +#[test] +fn set_ustar_path_hard() { + let mut h = Header::new_ustar(); + let p = Path::new("a").join(&vec!["a"; 100].join("")); + t!(h.set_path(&p)); + let path = t!(h.path()); + let actual: &Path = path.as_ref(); + assert_eq!(actual, p); +} + +#[test] +fn set_metadata_deterministic() { + let td = t!(Builder::new().prefix("async-tar").tempdir()); + let tmppath = td.path().join("tmpfile"); + + fn mk_header(path: &Path, readonly: bool) -> Header { + let mut file = t!(File::create(path)); + t!(file.write_all(b"c")); + let mut perms = t!(file.metadata()).permissions(); + perms.set_readonly(readonly); + t!(fs::set_permissions(path, perms)); + let mut h = Header::new_ustar(); + h.set_metadata_in_mode(&t!(path.metadata()), HeaderMode::Deterministic); + h + } + + // Create "the same" File twice in a row, one second apart, with differing readonly values. + let one = mk_header(tmppath.as_path(), false); + thread::sleep(time::Duration::from_millis(1050)); + let two = mk_header(tmppath.as_path(), true); + + // Always expected to match. + assert_eq!(t!(one.size()), t!(two.size())); + assert_eq!(t!(one.path()), t!(two.path())); + assert_eq!(t!(one.mode()), t!(two.mode())); + + // Would not match without `Deterministic`. + assert_eq!(t!(one.mtime()), t!(two.mtime())); + // TODO: No great way to validate that these would not be filled, but + // check them anyway. + assert_eq!(t!(one.uid()), t!(two.uid())); + assert_eq!(t!(one.gid()), t!(two.gid())); +} + +#[test] +fn extended_numeric_format() { + let mut h: GnuHeader = unsafe { mem::zeroed() }; + h.as_header_mut().set_size(42); + assert_eq!(h.size, [48, 48, 48, 48, 48, 48, 48, 48, 48, 53, 50, 0]); + h.as_header_mut().set_size(8_589_934_593); + assert_eq!(h.size, [0x80, 0, 0, 0, 0, 0, 0, 0x02, 0, 0, 0, 1]); + h.size = [0x80, 0, 0, 0, 0, 0, 0, 0x02, 0, 0, 0, 0]; + assert_eq!(h.as_header().entry_size().unwrap(), 0x0002_0000_0000); + h.size = [48, 48, 48, 48, 48, 48, 48, 48, 48, 53, 51, 0]; + assert_eq!(h.as_header().entry_size().unwrap(), 43); + + h.as_header_mut().set_gid(42); + assert_eq!(h.gid, [48, 48, 48, 48, 48, 53, 50, 0]); + assert_eq!(h.as_header().gid().unwrap(), 42); + h.as_header_mut().set_gid(0x7fff_ffff_ffff_ffff); + assert_eq!(h.gid, [0xff; 8]); + assert_eq!(h.as_header().gid().unwrap(), 0x7fff_ffff_ffff_ffff); + h.uid = [0x80, 0x00, 0x00, 0x00, 0x12, 0x34, 0x56, 0x78]; + assert_eq!(h.as_header().uid().unwrap(), 0x1234_5678); + + h.mtime = [ + 0x80, 0, 0, 0, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, + ]; + assert_eq!(h.as_header().mtime().unwrap(), 0x0123_4567_89ab_cdef); +} + +#[test] +fn byte_slice_conversion() { + let h = Header::new_gnu(); + let b: &[u8] = h.as_bytes(); + let b_conv: &[u8] = Header::from_byte_slice(h.as_bytes()).as_bytes(); + assert_eq!(b, b_conv); +}