Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#791: core: molecule: Lee-Crippen SMARTS pKa calculation method implemented #792

Merged
merged 15 commits into from
Jul 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions api/c/indigo/indigo.h
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,7 @@ CEXPORT int indigoNumHydrogenBondAcceptors(int molecule);
CEXPORT int indigoNumHydrogenBondDonors(int molecule);
CEXPORT double indigoLogP(int molecule);
CEXPORT double indigoMolarRefractivity(int molecule);
CEXPORT double indigoPka(int molecule);

CEXPORT const char* indigoCanonicalSmiles(int molecule);
CEXPORT const char* indigoLayeredCode(int molecule);
Expand Down
15 changes: 15 additions & 0 deletions api/c/indigo/src/indigo_calc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,3 +222,18 @@ CEXPORT double indigoMolarRefractivity(const int molecule)
}
INDIGO_END(-1);
}

CEXPORT double indigoPka(const int molecule)
{
INDIGO_BEGIN
{
auto& obj = self.getObject(molecule);
if (IndigoMolecule::is(obj))
{
auto& mol = obj.getMolecule();
return Crippen::pKa(mol);
}
throw IndigoError("incorrect object type for logP calculation: %s, should be molecule", obj.debugInfo());
}
INDIGO_END(-1);
}
3 changes: 3 additions & 0 deletions api/dotnet/src/IndigoLib.cs
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,9 @@ public static extern int indigoSetSGroupBrackets(int sgroup, int brk_style, floa
[DllImport("indigo"), SuppressUnmanagedCodeSecurity]
public static extern double indigoMolarRefractivity(int molecule);

[DllImport("indigo"), SuppressUnmanagedCodeSecurity]
public static extern double indigoPka(int molecule);

[DllImport("indigo"), SuppressUnmanagedCodeSecurity]
public static extern byte* indigoCanonicalSmiles(int molecule);

Expand Down
6 changes: 6 additions & 0 deletions api/dotnet/src/IndigoObject.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1438,6 +1438,12 @@ public double molarRefractivity()
return dispatcher.checkResult(IndigoLib.indigoMolarRefractivity(self));
}

public double pKa()
{
dispatcher.setSessionID();
return dispatcher.checkResult(IndigoLib.indigoPka(self));
}

public string canonicalSmiles()
{
dispatcher.setSessionID();
Expand Down
2 changes: 2 additions & 0 deletions api/java/indigo/src/main/java/com/epam/indigo/IndigoLib.java
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,8 @@ int indigoSetSGroupBrackets(

double indigoMolarRefractivity(int molecule);

double indigoPka(int molecule);

Pointer indigoCanonicalSmiles(int molecule);

long indigoHash(int item);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -859,6 +859,11 @@ public double molarRefractivity() {
return Indigo.checkResultDouble(this, lib.indigoMolarRefractivity(self));
}

public double pKa() {
dispatcher.setSessionID();
return Indigo.checkResultDouble(this, lib.indigoPka(self));
}

public String canonicalSmiles() {
dispatcher.setSessionID();
return Indigo.checkResultString(this, lib.indigoCanonicalSmiles(self));
Expand Down
13 changes: 13 additions & 0 deletions api/python/indigo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2623,6 +2623,17 @@ def molarRefractivity(self):
Indigo._lib.indigoMolarRefractivity(self.id)
)

def pKa(self):
"""Molecule method returns calculated Lee-Crippen SMARTS pKa value

Returns:
float: calculated pKa value of the molecule
"""
self.dispatcher._setSessionId()
return self.dispatcher._checkResultFloat(
Indigo._lib.indigoPka(self.id)
)

def bondOrder(self):
"""Bond method returns bond order

Expand Down Expand Up @@ -5106,6 +5117,8 @@ def __init__(self, path=None):
Indigo._lib.indigoLogP.argtypes = [c_int]
Indigo._lib.indigoMolarRefractivity.restype = c_double
Indigo._lib.indigoMolarRefractivity.argtypes = [c_int]
Indigo._lib.indigoPka.restype = c_double
Indigo._lib.indigoPka.argtypes = [c_int]
Indigo._lib.indigoCanonicalSmiles.restype = c_char_p
Indigo._lib.indigoCanonicalSmiles.argtypes = [c_int]
Indigo._lib.indigoCanonicalSmarts.restype = c_char_p
Expand Down
2 changes: 1 addition & 1 deletion api/tests/integration/ref/basic/basic_load.py.out
Original file line number Diff line number Diff line change
Expand Up @@ -1803,4 +1803,4 @@ M END
molfile loader: unsupported property of CTAB3000 (in BOND block): UNKNOWN_KEY
molfile loader: unsupported property of CTAB3000: UNKNOWN_KEY
****** Name is skeletal prefix ********
molecule auto loader: SMILES loader: unrecognized lowercase symbol: i
molecule auto loader: SMILES loader: unrecognized lowercase symbol: l
6 changes: 6 additions & 0 deletions api/tests/integration/tests/calc/crippen.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ def test_mr():
check_float("molarRefractivity", "Clc1ccccc1", 31.45)


def test_pka():
check_float("pKa", "Cc1cc(O)cc(c1)[N+](C)(C)C", 8.1999998)
check_float("pKa", "Cc1ccc(cc1)C1C[NH2+]1", 9.53)
check_float("pKa", "O=C(NBr)C(Cl)Cl", 4.1549997)


if __name__ == "__main__":
indigo = Indigo()
test_logp()
Expand Down
95 changes: 95 additions & 0 deletions core/indigo-core/common/base_cpp/csv_reader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/****************************************************************************
* Copyright (C) from 2009 to Present EPAM Systems.
*
* This file is part of Indigo toolkit.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
***************************************************************************/

#include "csv_reader.h"

using namespace indigo;

std::vector<std::string> CSVReader::readCSVRow(const std::string& row)
{
CSVState state = CSVState::UnquotedField;
std::vector<std::string> fields{""};
size_t i = 0; // index of the current field
for (char c : row)
{
switch (state)
{
case CSVState::UnquotedField:
switch (c)
{
case ',': // end of field
fields.push_back("");
i++;
break;
case '"':
state = CSVState::QuotedField;
break;
default:
fields[i].push_back(c);
break;
}
break;
case CSVState::QuotedField:
switch (c)
{
case '"':
state = CSVState::QuotedQuote;
break;
default:
fields[i].push_back(c);
break;
}
break;
case CSVState::QuotedQuote:
switch (c)
{
case ',': // , after closing quote
fields.push_back("");
i++;
state = CSVState::UnquotedField;
break;
case '"': // "" -> "
fields[i].push_back('"');
state = CSVState::QuotedField;
break;
default: // end of quote
state = CSVState::UnquotedField;
break;
}
break;
}
}
return fields;
}

std::vector<std::vector<std::string>> CSVReader::readCSV(std::istream& in)
{
std::vector<std::vector<std::string>> table;
std::string row;
while (!in.eof())
{
std::getline(in, row);
if (in.bad() || in.fail())
{
break;
}
auto fields = readCSVRow(row);
table.push_back(fields);
}
return table;
}
40 changes: 40 additions & 0 deletions core/indigo-core/common/base_cpp/csv_reader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/****************************************************************************
* Copyright (C) from 2009 to Present EPAM Systems.
*
* This file is part of Indigo toolkit.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
***************************************************************************/

#pragma once

#include <istream>
#include <string>
#include <vector>

namespace indigo
{
enum class CSVState
{
UnquotedField,
QuotedField,
QuotedQuote
};

class CSVReader
{
public:
static std::vector<std::vector<std::string>> readCSV(std::istream&);
static std::vector<std::string> readCSVRow(const std::string&);
};
}
4 changes: 2 additions & 2 deletions core/indigo-core/molecule/base_molecule.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,8 @@ namespace indigo

virtual bool isSaturatedAtom(int idx) = 0;

virtual int getBondOrder(int idx) = 0; // > 0 -- BOND_***, -1 -- not sure
virtual int getBondTopology(int idx) = 0; // > 0 -- TOPOLOGY_***, -1 -- not sure
virtual int getBondOrder(int idx) const = 0; // > 0 -- BOND_***, -1 -- not sure
virtual int getBondTopology(int idx) = 0; // > 0 -- TOPOLOGY_***, -1 -- not sure

// true if the atom number belongs to the given list, false otherwise
virtual bool atomNumberBelongs(int idx, const int* numbers, int count) = 0;
Expand Down
1 change: 1 addition & 0 deletions core/indigo-core/molecule/crippen.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,6 @@ namespace indigo
public:
static double logP(Molecule& molecule);
static double molarRefractivity(Molecule& molecule);
static double pKa(Molecule& molecule);
};
}
3 changes: 2 additions & 1 deletion core/indigo-core/molecule/molecule.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ namespace indigo
int getAtomCharge(int idx) override;
int getAtomIsotope(int idx) override;
int getAtomRadical(int idx) override;
int getBondOrder(int idx) override;
int getBondOrder(int idx) const override;
int getBondTopology(int idx) override;
int getAtomAromaticity(int idx) override;
int getExplicitValence(int idx) override;
Expand Down Expand Up @@ -160,6 +160,7 @@ namespace indigo
// Check
bool isNitrogenV5(int atom_index);
bool isNitrogenV5ForConnectivity(int atom_index, int conn);
bool isPiBonded(int atom_index) const;

void invalidateAtom(int index, int mask) override;

Expand Down
2 changes: 1 addition & 1 deletion core/indigo-core/molecule/molecule_layered_molecules.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ namespace indigo
dword getRSiteBits(int atom_idx) override;
void allowRGroupOnRSite(int atom_idx, int rg_idx) override;

int getBondOrder(int idx) override;
int getBondOrder(int idx) const override;
int getBondTopology(int idx) override;

bool atomNumberBelongs(int idx, const int* numbers, int count) override;
Expand Down
2 changes: 1 addition & 1 deletion core/indigo-core/molecule/molecule_tautomer.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ namespace indigo

void clear() override;

int getBondOrder(int idx) override;
int getBondOrder(int idx) const override;
int getBondTopology(int idx) override;
bool possibleBondOrder(int idx, int order) override;

Expand Down
13 changes: 7 additions & 6 deletions core/indigo-core/molecule/query_molecule.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ namespace indigo
ATOM_TEMPLATE,
ATOM_TEMPLATE_SEQID,
ATOM_TEMPLATE_CLASS,
ATOM_PI_BONDED,

BOND_ORDER,
BOND_TOPOLOGY,
Expand Down Expand Up @@ -126,8 +127,8 @@ namespace indigo
// Remove all constraints of the given type
void removeConstraints(int what_type);

bool sureValue(int what_type, int& value);
bool sureValueInv(int what_type, int& value);
bool sureValue(int what_type, int& value) const;
bool sureValueInv(int what_type, int& value) const;
bool possibleValue(int what_type, int what_value);
bool possibleValueInv(int what_type, int what_value);
bool possibleValuePair(int what_type1, int what_value1, int what_type2, int what_value2);
Expand All @@ -153,7 +154,7 @@ namespace indigo

Node* _findSureConstraint(int what_type, int& count);

virtual bool _sureValue(int what_type, int& value_out) = 0;
virtual bool _sureValue(int what_type, int& value_out) const = 0;
virtual bool _sureValueBelongs(int what_type, const int* arr, int count) = 0;

virtual void _optimize(){};
Expand Down Expand Up @@ -209,7 +210,7 @@ namespace indigo

bool _possibleValue(int what_type, int what_value) override;
bool _possibleValuePair(int what_type1, int what_value1, int what_type2, int what_value2) override;
bool _sureValue(int what_type, int& value_out) override;
bool _sureValue(int what_type, int& value_out) const override;
bool _sureValueBelongs(int what_type, const int* arr, int count) override;

void _optimize() override;
Expand Down Expand Up @@ -244,7 +245,7 @@ namespace indigo

bool _possibleValue(int what_type, int what_value) override;
bool _possibleValuePair(int what_type1, int what_value1, int what_type2, int what_value2) override;
bool _sureValue(int what_type, int& value_out) override;
bool _sureValue(int what_type, int& value_out) const override;
bool _sureValueBelongs(int what_type, const int* arr, int count) override;
};

Expand Down Expand Up @@ -288,7 +289,7 @@ namespace indigo

bool isSaturatedAtom(int idx) override;

int getBondOrder(int idx) override;
int getBondOrder(int idx) const override;
int getBondTopology(int idx) override;
bool atomNumberBelongs(int idx, const int* numbers, int count) override;
bool possibleAtomNumber(int idx, int number) override;
Expand Down
Loading