From 37fdd56d583c6c6200ba4f5561c0ffc1300d0675 Mon Sep 17 00:00:00 2001 From: simleo Date: Fri, 16 Sep 2022 15:58:53 +0200 Subject: [PATCH 1/3] raise an error when writing a file whose source does not exist --- rocrate/model/file.py | 7 +++++-- test/test_read.py | 16 ++++++++++++++++ test/test_write.py | 13 +++++++++++-- 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/rocrate/model/file.py b/rocrate/model/file.py index 263677d..6ea48db 100644 --- a/rocrate/model/file.py +++ b/rocrate/model/file.py @@ -18,10 +18,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os from pathlib import Path import shutil import urllib.request +import warnings from http.client import HTTPResponse from io import BytesIO, StringIO @@ -59,7 +59,10 @@ def write(self, base_path): if self.fetch_remote: out_file_path.parent.mkdir(parents=True, exist_ok=True) urllib.request.urlretrieve(response.url, out_file_path) - elif os.path.isfile(self.source): + elif self.source is None: + # Allows to record a File entity whose @id does not exist, see #73 + warnings.warn(f"No source for {self.id}") + else: out_file_path.parent.mkdir(parents=True, exist_ok=True) if not out_file_path.exists() or not out_file_path.samefile(self.source): shutil.copy(self.source, out_file_path) diff --git a/test/test_read.py b/test/test_read.py index edb1d9f..f5ea5a2 100644 --- a/test/test_read.py +++ b/test/test_read.py @@ -324,6 +324,7 @@ def test_missing_dir(test_data_dir, tmpdir): assert not (out_path / 'README.txt').exists() +@pytest.mark.filterwarnings("ignore") def test_missing_file(test_data_dir, tmpdir): crate_dir = test_data_dir / 'read_crate' name = 'test_file_galaxy.txt' @@ -335,9 +336,24 @@ def test_missing_file(test_data_dir, tmpdir): assert test_file.id == name out_path = tmpdir / 'crate_read_out' + with pytest.raises(OSError): + crate.write(out_path) + + # Two options to get a writable crate + + # 1. Force write the crate as it is + test_file.source = None crate.write(out_path) assert not (out_path / name).exists() + # 2. Provide an existing source + source = tmpdir / "source.txt" + text = "foo\nbar\n" + source.write_text(text) + test_file.source = source + crate.write(out_path) + assert (out_path / name).read_text() == text + def test_generic_data_entity(tmpdir): rc_id = "#collection" diff --git a/test/test_write.py b/test/test_write.py index b806ae5..4f8dbed 100644 --- a/test/test_write.py +++ b/test/test_write.py @@ -301,6 +301,7 @@ def test_remote_uri_exceptions(tmpdir): # no error on Windows, or on Linux as root, so we don't use pytest.raises +@pytest.mark.filterwarnings("ignore") @pytest.mark.parametrize("fetch_remote,validate_url", [(False, False), (False, True), (True, False), (True, True)]) def test_missing_source(test_data_dir, tmpdir, fetch_remote, validate_url): path = test_data_dir / uuid.uuid4().hex @@ -310,13 +311,21 @@ def test_missing_source(test_data_dir, tmpdir, fetch_remote, validate_url): file_ = crate.add_file(path, **args) assert file_ is crate.dereference(path.name) out_path = tmpdir / 'ro_crate_out_1' - crate.write(out_path) - assert not (out_path / path.name).exists() + with pytest.raises(OSError): + crate.write(out_path) crate = ROCrate() file_ = crate.add_file(path, path.name, **args) assert file_ is crate.dereference(path.name) out_path = tmpdir / 'ro_crate_out_2' + with pytest.raises(OSError): + crate.write(out_path) + + crate = ROCrate() + file_ = crate.add_file(None, path.name, **args) + assert file_ is crate.dereference(path.name) + out_path = tmpdir / 'ro_crate_out_3' + crate.write(out_path) assert not (out_path / path.name).exists() From 685d187be0e25f24bcc7b13df488e7d1b802dcfe Mon Sep 17 00:00:00 2001 From: simleo Date: Fri, 16 Sep 2022 16:54:11 +0200 Subject: [PATCH 2/3] simplified test_missing_source --- test/test_write.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/test/test_write.py b/test/test_write.py index 4f8dbed..9c903ad 100644 --- a/test/test_write.py +++ b/test/test_write.py @@ -302,27 +302,25 @@ def test_remote_uri_exceptions(tmpdir): @pytest.mark.filterwarnings("ignore") -@pytest.mark.parametrize("fetch_remote,validate_url", [(False, False), (False, True), (True, False), (True, True)]) -def test_missing_source(test_data_dir, tmpdir, fetch_remote, validate_url): +def test_missing_source(test_data_dir, tmpdir): path = test_data_dir / uuid.uuid4().hex - args = {"fetch_remote": fetch_remote, "validate_url": validate_url} crate = ROCrate() - file_ = crate.add_file(path, **args) + file_ = crate.add_file(path) assert file_ is crate.dereference(path.name) out_path = tmpdir / 'ro_crate_out_1' with pytest.raises(OSError): crate.write(out_path) crate = ROCrate() - file_ = crate.add_file(path, path.name, **args) + file_ = crate.add_file(path, path.name) assert file_ is crate.dereference(path.name) out_path = tmpdir / 'ro_crate_out_2' with pytest.raises(OSError): crate.write(out_path) crate = ROCrate() - file_ = crate.add_file(None, path.name, **args) + file_ = crate.add_file(None, path.name) assert file_ is crate.dereference(path.name) out_path = tmpdir / 'ro_crate_out_3' crate.write(out_path) From 15f5d801dd314525b2ab9e5d86d86a8a7b85dfa6 Mon Sep 17 00:00:00 2001 From: simleo Date: Mon, 19 Sep 2022 12:23:03 +0200 Subject: [PATCH 3/3] raise an error when writing a dir whose source does not exist --- rocrate/model/dataset.py | 15 +++++++++++--- test/test_read.py | 22 ++++++++++++++++++-- test/test_write.py | 45 ++++++++++++++++++++++++---------------- 3 files changed, 59 insertions(+), 23 deletions(-) diff --git a/rocrate/model/dataset.py b/rocrate/model/dataset.py index f883a02..b378ea3 100644 --- a/rocrate/model/dataset.py +++ b/rocrate/model/dataset.py @@ -18,6 +18,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import errno +import os import shutil from pathlib import Path from urllib.request import urlopen @@ -48,9 +50,16 @@ def write(self, base_path): if self.fetch_remote: self.__get_parts(out_path) else: - out_path.mkdir(parents=True, exist_ok=True) - if not self.crate.source and self.source and Path(self.source).exists(): - self.crate._copy_unlisted(self.source, out_path) + if self.source is None: + out_path.mkdir(parents=True, exist_ok=True) + else: + if not Path(self.source).exists(): + raise FileNotFoundError( + errno.ENOENT, os.strerror(errno.ENOENT), str(self.source) + ) + out_path.mkdir(parents=True, exist_ok=True) + if not self.crate.source: + self.crate._copy_unlisted(self.source, out_path) def __get_parts(self, out_path): out_path.mkdir(parents=True, exist_ok=True) diff --git a/test/test_read.py b/test/test_read.py index f5ea5a2..df50801 100644 --- a/test/test_read.py +++ b/test/test_read.py @@ -320,8 +320,24 @@ def test_missing_dir(test_data_dir, tmpdir): assert examples_dataset.id == f'{name}/' out_path = tmpdir / 'crate_read_out' + with pytest.raises(OSError): + crate.write(out_path) + + # Two options to get a writable crate + + # 1. Set the source to None (will create an empty dir) + examples_dataset.source = None crate.write(out_path) - assert not (out_path / 'README.txt').exists() + assert (out_path / name).is_dir() + + shutil.rmtree(out_path) + + # 2. Provide an existing source + source = tmpdir / "source" + source.mkdir() + examples_dataset.source = source + crate.write(out_path) + assert (out_path / name).is_dir() @pytest.mark.filterwarnings("ignore") @@ -341,11 +357,13 @@ def test_missing_file(test_data_dir, tmpdir): # Two options to get a writable crate - # 1. Force write the crate as it is + # 1. Set the source to None (file will still be missing in the copy) test_file.source = None crate.write(out_path) assert not (out_path / name).exists() + shutil.rmtree(out_path) + # 2. Provide an existing source source = tmpdir / "source.txt" text = "foo\nbar\n" diff --git a/test/test_write.py b/test/test_write.py index 9c903ad..15a5244 100644 --- a/test/test_write.py +++ b/test/test_write.py @@ -302,29 +302,35 @@ def test_remote_uri_exceptions(tmpdir): @pytest.mark.filterwarnings("ignore") -def test_missing_source(test_data_dir, tmpdir): +@pytest.mark.parametrize("what", ["file", "dataset"]) +def test_missing_source(test_data_dir, tmpdir, what): path = test_data_dir / uuid.uuid4().hex crate = ROCrate() - file_ = crate.add_file(path) - assert file_ is crate.dereference(path.name) - out_path = tmpdir / 'ro_crate_out_1' + entity = getattr(crate, f"add_{what}")(path) + assert entity is crate.dereference(path.name) + crate_dir = tmpdir / 'ro_crate_out_1' with pytest.raises(OSError): - crate.write(out_path) + crate.write(crate_dir) crate = ROCrate() - file_ = crate.add_file(path, path.name) - assert file_ is crate.dereference(path.name) - out_path = tmpdir / 'ro_crate_out_2' + entity = getattr(crate, f"add_{what}")(path, path.name) + assert entity is crate.dereference(path.name) + crate_dir = tmpdir / 'ro_crate_out_2' with pytest.raises(OSError): - crate.write(out_path) + crate.write(crate_dir) crate = ROCrate() - file_ = crate.add_file(None, path.name) - assert file_ is crate.dereference(path.name) - out_path = tmpdir / 'ro_crate_out_3' - crate.write(out_path) - assert not (out_path / path.name).exists() + entity = getattr(crate, f"add_{what}")(None, path.name) + assert entity is crate.dereference(path.name) + crate_dir = tmpdir / 'ro_crate_out_3' + crate.write(crate_dir) + out_path = crate_dir / path.name + if what == "file": + assert not out_path.exists() + else: + assert out_path.is_dir() + assert not any(out_path.iterdir()) @pytest.mark.parametrize("fetch_remote,validate_url", [(False, False), (False, True), (True, False), (True, True)]) @@ -343,12 +349,15 @@ def test_no_source_no_dest(test_data_dir, fetch_remote, validate_url): def test_dataset(test_data_dir, tmpdir): crate = ROCrate() - path = test_data_dir / "a" / "b" - d1 = crate.add_dataset(path) + path_a_b = test_data_dir / "a" / "b" + path_c = test_data_dir / "c" + for p in path_a_b, path_c: + p.mkdir(parents=True) + d1 = crate.add_dataset(path_a_b) assert crate.dereference("b") is d1 - d2 = crate.add_dataset(path, "a/b") + d2 = crate.add_dataset(path_a_b, "a/b") assert crate.dereference("a/b") is d2 - d_from_str = crate.add_dataset(str(test_data_dir / "c")) + d_from_str = crate.add_dataset(str(path_c)) assert crate.dereference("c") is d_from_str out_path = tmpdir / 'ro_crate_out'