Skip to content

Commit

Permalink
epam#54 Fixed offsets for Column format detection
Browse files Browse the repository at this point in the history
It looks like `SasFileParser.FormatAndLabelSubheader.processSubheader` and and `SasFileConstants`
files should use another offsets to calculate column format information correctly for x64 bitness.

Before a fix:
```java
long COLUMN_FORMAT_WIDTH_OFFSET = 8L;
long COLUMN_FORMAT_PRECISION_OFFSET = 10L;

subheaderOffset + COLUMN_FORMAT_WIDTH_OFFSET + intOrLongLength,
subheaderOffset + COLUMN_FORMAT_PRECISION_OFFSET + intOrLongLength,
```

For 32-bit:
COLUMN_FORMAT_WIDTH_OFFSET + intOrLongLength = 8 + 4 = 12 (correct)
COLUMN_FORMAT_PRECISION_OFFSET + intOrLongLength = 10 + 4 = 14 (correct)

For 64-bit:
COLUMN_FORMAT_WIDTH_OFFSET + intOrLongLength = 8 + 8 = 16 (not correct)
COLUMN_FORMAT_PRECISION_OFFSET + intOrLongLength = 10 + 8 = 18 (not correct)

After the fix:
```java
long COLUMN_FORMAT_WIDTH_OFFSET = 0L;
long COLUMN_FORMAT_PRECISION_OFFSET = 2L;

subheaderOffset + COLUMN_FORMAT_PRECISION_OFFSET + 3 * intOrLongLength,
subheaderOffset + COLUMN_FORMAT_TEXT_SUBHEADER_INDEX_OFFSET + 3 * intOrLongLength,
```

For 32-bit:
COLUMN_FORMAT_WIDTH_OFFSET + 3 * intOrLongLength = 0 + 3 * 4 = 12 (correct and the same as above)
COLUMN_FORMAT_PRECISION_OFFSET + 3 * intOrLongLength = 2 + 3 * 4 = 14 (correct and the same as above)

For 64-bit:
COLUMN_FORMAT_WIDTH_OFFSET + 3 *intOrLongLength = 0 + 3 * 8 = 24 (correct)
COLUMN_FORMAT_PRECISION_OFFSET + 3 * intOrLongLength = 2 + 3 * 8 = 26 (correct)

So, this new calculation gives exactly the same offsets for 32-bit files and also fixes it for 64-bit files.
  • Loading branch information
xantorohara committed Dec 3, 2020
1 parent fc62a76 commit be12344
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 10 deletions.
4 changes: 2 additions & 2 deletions src/main/java/com/epam/parso/impl/SasFileConstants.java
Original file line number Diff line number Diff line change
Expand Up @@ -809,7 +809,7 @@ public interface SasFileConstants {
* (int or long depending on {@link SasFileConstants#ALIGN_2_VALUE}) from
* the beginning of the {@link SasFileParser.FormatAndLabelSubheader} subheader.
*/
long COLUMN_FORMAT_WIDTH_OFFSET = 8L;
long COLUMN_FORMAT_WIDTH_OFFSET = 0L;

/**
* For some table column, the sas7bdat file stores width of format:
Expand All @@ -827,7 +827,7 @@ public interface SasFileConstants {
* (int or long depending on {@link SasFileConstants#ALIGN_2_VALUE}) from
* the beginning of the {@link SasFileParser.FormatAndLabelSubheader} subheader.
*/
long COLUMN_FORMAT_PRECISION_OFFSET = 10L;
long COLUMN_FORMAT_PRECISION_OFFSET = 2L;

/**
* For some table column, the sas7bdat file stores width of format:
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/com/epam/parso/impl/SasFileParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -1558,8 +1558,8 @@ class FormatAndLabelSubheader implements ProcessingSubheader {
@Override
public void processSubheader(long subheaderOffset, long subheaderLength) throws IOException {
int intOrLongLength = sasFileProperties.isU64() ? BYTES_IN_LONG : BYTES_IN_INT;
Long[] offset = {subheaderOffset + COLUMN_FORMAT_WIDTH_OFFSET + intOrLongLength,
subheaderOffset + COLUMN_FORMAT_PRECISION_OFFSET + intOrLongLength,
Long[] offset = {subheaderOffset + COLUMN_FORMAT_WIDTH_OFFSET + 3 * intOrLongLength,
subheaderOffset + COLUMN_FORMAT_PRECISION_OFFSET + 3 * intOrLongLength,
subheaderOffset + COLUMN_FORMAT_TEXT_SUBHEADER_INDEX_OFFSET + 3 * intOrLongLength,
subheaderOffset + COLUMN_FORMAT_OFFSET_OFFSET + 3 * intOrLongLength,
subheaderOffset + COLUMN_FORMAT_LENGTH_OFFSET + 3 * intOrLongLength,
Expand Down
85 changes: 79 additions & 6 deletions src/test/java/com/epam/parso/BugsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,94 @@
import org.junit.Test;

import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;

import static org.junit.Assert.assertEquals;
import static org.assertj.core.api.Assertions.assertThat;

public class BugsTest {


@Test
public void testOOM() throws Exception {
try (InputStream is = Files.newInputStream(Paths.get(this.getClass().getResource(
"/bugs/mixed_data_one.sas7bdat.oom").toURI()))) {
try (InputStream is = this.getClass().getResourceAsStream("/bugs/mixed_data_one.sas7bdat.oom")) {
SasFileReader sasFileReader = new SasFileReaderImpl(is);
long rowCount = sasFileReader.getSasFileProperties().getRowCount();
assertEquals(0, rowCount);
assertThat(rowCount).isEqualTo(0);
}
}

/**
* See reported comment at https://github.com/epam/parso/issues/54#issue-492745901
*/
@Test
public void testColumnFormat64BitIssue54() throws Exception {
try (InputStream is = this.getClass().getResourceAsStream("/bugs/54-class.sas7bdat")) {
SasFileReader sasFileReader = new SasFileReaderImpl(is);

SasFileProperties properties = sasFileReader.getSasFileProperties();
List<Column> columns = sasFileReader.getColumns();

assertThat(properties.isU64()).isTrue();

assertThat(columns.get(0).getName()).isEqualTo("Name");
assertThat(columns.get(0).getLength()).isEqualTo(8);
assertThat(columns.get(0).getFormat().getWidth()).isEqualTo(0);
assertThat(columns.get(0).getFormat().getPrecision()).isEqualTo(0);

assertThat(columns.get(1).getName()).isEqualTo("Sex");
assertThat(columns.get(1).getLength()).isEqualTo(1);
assertThat(columns.get(1).getFormat().getWidth()).isEqualTo(0);
assertThat(columns.get(1).getFormat().getPrecision()).isEqualTo(0);

assertThat(columns.get(2).getName()).isEqualTo("Age");
assertThat(columns.get(2).getLength()).isEqualTo(8);
assertThat(columns.get(2).getFormat().getWidth()).isEqualTo(4);
assertThat(columns.get(2).getFormat().getPrecision()).isEqualTo(0);

assertThat(columns.get(3).getName()).isEqualTo("Height");
assertThat(columns.get(3).getLength()).isEqualTo(8);
assertThat(columns.get(3).getFormat().getWidth()).isEqualTo(8);
assertThat(columns.get(3).getFormat().getPrecision()).isEqualTo(2);

assertThat(columns.get(4).getName()).isEqualTo("Weight");
assertThat(columns.get(4).getLength()).isEqualTo(8);
assertThat(columns.get(4).getFormat().getWidth()).isEqualTo(0);
assertThat(columns.get(4).getFormat().getPrecision()).isEqualTo(0);
}
}

/**
* See dataset properties at https://github.com/epam/parso/issues/54#issuecomment-737872522
*/
@Test
public void testColumnFormat32BitIssue54() throws Exception {
try (InputStream is = this.getClass().getResourceAsStream("/bugs/54-cookie.sas7bdat")) {
SasFileReader sasFileReader = new SasFileReaderImpl(is);

SasFileProperties properties = sasFileReader.getSasFileProperties();
List<Column> columns = sasFileReader.getColumns();

assertThat(properties.isU64()).isFalse();

assertThat(columns.get(15).getName()).isEqualTo("AROMA");
assertThat(columns.get(15).getLength()).isEqualTo(8);
assertThat(columns.get(15).getFormat().getWidth()).isEqualTo(4);
assertThat(columns.get(15).getFormat().getPrecision()).isEqualTo(1);

assertThat(columns.get(16).getName()).isEqualTo("SWEET");
assertThat(columns.get(16).getLength()).isEqualTo(8);
assertThat(columns.get(16).getFormat().getWidth()).isEqualTo(3);
assertThat(columns.get(16).getFormat().getPrecision()).isEqualTo(1);

assertThat(columns.get(25).getName()).isEqualTo("INSTRON");
assertThat(columns.get(25).getLength()).isEqualTo(4);
assertThat(columns.get(25).getFormat().getWidth()).isEqualTo(0);
assertThat(columns.get(25).getFormat().getPrecision()).isEqualTo(0);

assertThat(columns.get(26).getName()).isEqualTo("L");
assertThat(columns.get(26).getLength()).isEqualTo(8);
assertThat(columns.get(26).getFormat().getWidth()).isEqualTo(5);
assertThat(columns.get(26).getFormat().getPrecision()).isEqualTo(2);
}
}
}
Binary file added src/test/resources/bugs/54-class.sas7bdat
Binary file not shown.
Binary file added src/test/resources/bugs/54-cookie.sas7bdat
Binary file not shown.

0 comments on commit be12344

Please sign in to comment.