{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Import all the necessary libraries" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import pickle\n", "from rdkit.Chem import AllChem\n", "from rdkit.Chem.rdMolDescriptors import GetMACCSKeysFingerprint\n", "from rdkit import DataStructs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Processing the data to be predicted\n", "Below codes demonstrate how to process the smiles strings in an xlsx file. You can download the file above." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | SMILES | \n", "Time | \n", "Guideline | \n", "Principle | \n", "Endpoint | \n", "Reliability | \n", "
---|---|---|---|---|---|---|
0 | \n", "CC1(C)OC[C@@H](CC(OC(=O)[O-])C2c3ccccc3-c3cccc... | \n", "14 | \n", "OECD 301A | \n", "DOC die away | \n", "Ready | \n", "1 | \n", "
1 | \n", "CC1(C)OC[C@@H](CC=O)O1 | \n", "28 | \n", "OECD 301A | \n", "DOC die away | \n", "Ready | \n", "1 | \n", "
2 | \n", "CC1(C)OC[C@@H](CCCO)O1 | \n", "28 | \n", "OECD 301A | \n", "DOC die away | \n", "Ready | \n", "1 | \n", "
3 | \n", "CC1(C)OC[C@@H](CCI)O1 | \n", "28 | \n", "OECD 301A | \n", "DOC die away | \n", "Ready | \n", "1 | \n", "
4 | \n", "CC1(C)OC[C@@H](CCO)O1 | \n", "28 | \n", "OECD 301A | \n", "DOC die away | \n", "Ready | \n", "1 | \n", "
\n", " | fp | \n", "Time | \n", "Guideline | \n", "Principle | \n", "Endpoint | \n", "Reliability | \n", "
---|---|---|---|---|---|---|
0 | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "14 | \n", "OECD 301A | \n", "DOC die away | \n", "Ready | \n", "1 | \n", "
1 | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "28 | \n", "OECD 301A | \n", "DOC die away | \n", "Ready | \n", "1 | \n", "
2 | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "28 | \n", "OECD 301A | \n", "DOC die away | \n", "Ready | \n", "1 | \n", "
3 | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "28 | \n", "OECD 301A | \n", "DOC die away | \n", "Ready | \n", "1 | \n", "
4 | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "28 | \n", "OECD 301A | \n", "DOC die away | \n", "Ready | \n", "1 | \n", "
\n", " | fp | \n", "Time | \n", "Guideline | \n", "Principle | \n", "Endpoint | \n", "Reliability | \n", "
---|---|---|---|---|---|---|
0 | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "14 | \n", "4 | \n", "0 | \n", "0 | \n", "1 | \n", "
1 | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "28 | \n", "4 | \n", "0 | \n", "0 | \n", "1 | \n", "
2 | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "28 | \n", "4 | \n", "0 | \n", "0 | \n", "1 | \n", "
3 | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "28 | \n", "4 | \n", "0 | \n", "0 | \n", "1 | \n", "
4 | \n", "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "28 | \n", "4 | \n", "0 | \n", "0 | \n", "1 | \n", "
\n", " | SMILES | \n", "Time | \n", "Guideline | \n", "Principle | \n", "Endpoint | \n", "Reliability | \n", "Prediction | \n", "Similarity | \n", "Expected prediction R2 | \n", "Expected prediction RMSE | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "CC1(C)OC[C@@H](CC(OC(=O)[O-])C2c3ccccc3-c3cccc... | \n", "14 | \n", "OECD 301A | \n", "DOC die away | \n", "Ready | \n", "1 | \n", "40.6% | \n", "0.64 | \n", "0.44 | \n", "0.26 | \n", "
1 | \n", "CC1(C)OC[C@@H](CC=O)O1 | \n", "28 | \n", "OECD 301A | \n", "DOC die away | \n", "Ready | \n", "1 | \n", "52.5% | \n", "0.83 | \n", "0.66 | \n", "0.21 | \n", "
2 | \n", "CC1(C)OC[C@@H](CCCO)O1 | \n", "28 | \n", "OECD 301A | \n", "DOC die away | \n", "Ready | \n", "1 | \n", "48.9% | \n", "0.84 | \n", "0.66 | \n", "0.21 | \n", "
3 | \n", "CC1(C)OC[C@@H](CCI)O1 | \n", "28 | \n", "OECD 301A | \n", "DOC die away | \n", "Ready | \n", "1 | \n", "31.5% | \n", "0.74 | \n", "0.59 | \n", "0.23 | \n", "
4 | \n", "CC1(C)OC[C@@H](CCO)O1 | \n", "28 | \n", "OECD 301A | \n", "DOC die away | \n", "Ready | \n", "1 | \n", "40.6% | \n", "0.82 | \n", "0.66 | \n", "0.21 | \n", "