diff --git a/src/encoded/schemas/changelogs/file.md b/src/encoded/schemas/changelogs/file.md index f8225747627..ec794db8712 100644 --- a/src/encoded/schemas/changelogs/file.md +++ b/src/encoded/schemas/changelogs/file.md @@ -2,10 +2,12 @@ ### Schema version 25 * *output_type* *smoothed methylation stage at CpG* was updated to *smoothed methylation state at CpG* +* *file_format* was updated to include *h5ad*. ### Minor changes since schema version 24 * The *output_type* enum was updated to include *UV enriched segment quantifications*, *plus strand methylation state at CpG*, *minus strand methylation state at CpG*, *CpG sites coverage*, and *sparse gene count matrix*. + ### Schema version 24 * *output_type: stable peaks* was updated to *output_type: pseudo-replicated peaks* diff --git a/src/encoded/schemas/file.json b/src/encoded/schemas/file.json index 50015cb2dde..8e53ab70d0b 100644 --- a/src/encoded/schemas/file.json +++ b/src/encoded/schemas/file.json @@ -839,6 +839,7 @@ "gff", "gtf", "idx", + "h5ad", "hdf5", "hic", "PWM", @@ -1052,6 +1053,7 @@ "fastq", "gff", "gtf", + "h5ad", "hdf5", "hic", "idat", @@ -2112,6 +2114,7 @@ "gtf": ".gtf.gz", "idx": ".idx", "hic": ".hic", + "h5ad": ".h5ad", "hdf5": ".h5", "idat": ".idat", "PWM":".pwm", diff --git a/src/encoded/tests/data/inserts/expected_batch_download.tsv b/src/encoded/tests/data/inserts/expected_batch_download.tsv index 1a79375e750..7fa24cee2c4 100644 --- a/src/encoded/tests/data/inserts/expected_batch_download.tsv +++ b/src/encoded/tests/data/inserts/expected_batch_download.tsv @@ -112,4 +112,4 @@ http://localhost/files/ENCFF129DNA/@@download/ENCFF129DNA.bigWig http://localhost/files/ENCFF127ACM/@@download/ENCFF127ACM.fastq.gz http://localhost/files/ENCFF718LJW/@@download/ENCFF718LJW.bam http://localhost/files/ENCFF081AVF/@@download/ENCFF081AVF.bed.gz -http://localhost/files/ENCFF001SCR/@@download/ENCFF001SCR.bigWig +http://localhost/files/ENCFF001SCR/@@download/ENCFF001SCR.h5ad diff --git a/src/encoded/tests/data/inserts/expected_metadata.tsv b/src/encoded/tests/data/inserts/expected_metadata.tsv index 7f77f1862e2..bdbeaeb0ec9 100644 --- a/src/encoded/tests/data/inserts/expected_metadata.tsv +++ b/src/encoded/tests/data/inserts/expected_metadata.tsv @@ -112,4 +112,4 @@ ENCFF129DNA bigWig bigWig signal of all reads hg19 ENCSR751YPU DNase-seq UBERON ENCFF127ACM fastq fastq reads ENCSR604DNT DNase-seq UBERON:0001891 midbrain tissue Homo sapiens DNA 2020-01-04 Roadmap 1 1_1 50 single-ended 13689 John Stamatoyannopoulos, UW d49c217e369df785177805b864542e88 http://localhost/files/ENCFF127ACM/@@download/ENCFF127ACM.fastq.gz Illumina HiSeq 2000 released missing documents ENCFF718LJW bam bam alignments GRCh38 ENCSR604DNT DNase-seq UBERON:0001891 midbrain tissue Homo sapiens DNA 2020-01-04 Roadmap 1 1_1 /files/ENCFF127ACM/ 145214 John Stamatoyannopoulos, UW eabe445821ad629815edc9875994f256 http://localhost/files/ENCFF718LJW/@@download/ENCFF718LJW.bam released missing analysis_step_run missing documents ENCFF081AVF bed narrowPeak bed narrowPeak footprints GRCh38 ENCSR604DNT DNase-seq UBERON:0001891 midbrain tissue Homo sapiens DNA 2020-01-04 Roadmap 1 1_1 /files/ENCFF718LJW/ 18390 John Stamatoyannopoulos, UW d53c4aee0153037136cab8c35d2bb6ca http://localhost/files/ENCFF081AVF/@@download/ENCFF081AVF.bed.gz released missing analysis_step_run missing documents -ENCFF001SCR bigWig bigWig sparse gene count matrix GRCh38 ENCSR000SCR scRNA-seq EFO:0002067 K562 cell line ENCODE 370148294 Barbara Wold, Caltech 128ecf542a35ac5270a87dc740918404 http://localhost/files/ENCFF001SCR/@@download/ENCFF001SCR.bigWig V19 in progress missing analysis_step_run missing documents +ENCFF001SCR h5ad h5ad sparse gene count matrix GRCh38 ENCSR000SCR scRNA-seq EFO:0002067 K562 cell line ENCODE 370148294 Barbara Wold, Caltech 128ecf542a35ac5270a87dc740918404 http://localhost/files/ENCFF001SCR/@@download/ENCFF001SCR.h5ad V19 in progress missing analysis_step_run missing documents diff --git a/src/encoded/tests/data/inserts/file.json b/src/encoded/tests/data/inserts/file.json index 3c6319df4e9..f8677bf5817 100644 --- a/src/encoded/tests/data/inserts/file.json +++ b/src/encoded/tests/data/inserts/file.json @@ -4096,7 +4096,7 @@ "submitted_by": "facilisi.tristique@potenti.vivamus", "md5sum": "128ecf542a35ac5270a87dc740918404", "accession": "ENCFF001SCR", - "file_format": "bigWig", + "file_format": "h5ad", "award": "/awards/U54HG006998/", "assembly": "GRCh38", "genome_annotation": "V19", @@ -4106,8 +4106,7 @@ "file_size": 370148294, "output_type": "sparse gene count matrix", "lab": "/labs/barbara-wold/", - "aliases": [], - "notes": "File format not yet supported for this output_type (expecting h5ad)" + "aliases": ["encode:hdf5-sparse-count-matrix"] }, { "status": "released",