Module:CineMol/parsers

From Wikipedia, the free encyclopedia
Jump to navigation Jump to search
-- This is a port of CineMol to lua
-- CineMol https://github.com/moltools/CineMol was written by David Meijer, Marnix H. Medema & Justin J. J. van der Hooft and is MIT licensed
-- Please consider any edits made to this page as dual licensed MIT & CC-BY-SA 4.0

-- This module contains functions for parsing molecular file formats.
local p = {}

local api = require( 'Module:CineMol/api' )
local Atom = api.Atom
local Bond = api.Bond

local function subtable( table, start, stop )
	local res = {}
	stop = stop > #table and #table or stop
	for i = start, stop do
		res[#res+1] = table[i]
	end
	return res
end

function p.parse_sdf(src, include_hs)
	assert( type( src ) == 'string', 'First argument should be string' )
	include_hs = include_hs == nil and true or include_hs
	assert( type( include_hs ) == 'boolean', 'include_hs should be bool or nil' )

    local atoms, bonds = {}, {}

    local lines = mw.text.split( src, "\n", true )

    local counts_line = lines[4]  -- Counts line of the first molecule in the SDF file.

    local atom_count = tonumber(string.sub( counts_line, 1, 3))
    assert( atom_count ~= nil, 'Could not read atom_count. Expected number got ' .. string.sub( counts_line, 1, 3) )
    local bond_count = tonumber(string.sub( counts_line, 4, 6))
    assert( bond_count ~= nil, 'Could not read bond_count. Expected number got ' .. string.sub( counts_line, 4, 6) )

    local atom_lines = subtable( lines, 5, 4 + atom_count )
    local bond_lines = subtable( lines, 5+atom_count, 4+atom_count+bond_count )

    local atom_index = 0 

    -- Parse atom line.
    for _, atom_line in ipairs(atom_lines) do
        atom_index = atom_index + 1

        local x = tonumber(string.sub( atom_line, 1, 10 ) )
		assert( x ~= nil, "Cannot read x coord of " .. _ .. " expected number got " .. string.sub( atom_line, 1, 10 ) )
        local y =  tonumber(string.sub( atom_line, 11, 20 ) )
		assert( y ~= nil, "Cannot read y coord of " .. _ .. " expected number got " .. string.sub( atom_line, 11, 20 ) )
        local z =  tonumber(string.sub( atom_line, 21, 30 ) )
		assert( z ~= nil, "Cannot read z coord of " .. _ .. " expected number got " .. string.sub( atom_line, 21, 30 ) )
        local atom_symbol = mw.text.trim( string.sub( atom_line, 32, 34 ) )
		assert( atom_symbol ~= '', 'Cannot read atom symbol of ' .. _ )

        atoms[#atoms+1] = Atom(atom_index, atom_symbol, {x, y, z})
    end

    -- Parse bond line.
    for i, bond_line in ipairs( bond_lines ) do

        local start_index = tonumber(string.sub( bond_line, 1, 3 ) )
        local stop_index = tonumber(string.sub( bond_line, 4, 6 ) )
        local bond_order = tonumber(string.sub( bond_line, 7, 9 ) )
        assert( start_index ~= nil, "Cannot read start_index " .. i .. " expected number got " .. string.sub( bond_line, 1, 3 ))
        assert( stop_index ~= nil, "Cannot read stop_index " .. i .. " expected number got " .. string.sub( bond_line, 4, 6 ))
        assert( bond_order ~= nil, "Cannot read bond_order " .. i .. " expected number got " .. string.sub( bond_line, 7, 9 ))

        bonds[#bonds+1] = Bond(start_index, stop_index, bond_order)
	end

	local atom_map = {}
	for i,v in ipairs( atoms ) do
		atom_map[v.index] = v	
	end

    if not include_hs then
		local tmp_atoms = {}
		for _, atom in ipairs( atoms ) do
			if atom.symbol ~= 'H' then
				tmp_atoms[#tmp_atoms+1] = atom
			end
		end
		atoms = tmp_atoms

		local tmp_bonds = {}
		for _, bond in ipairs( bonds ) do
			if atom_map[bond.start_index].symbol ~= "H" and atom_map[bond.end_index].symbol ~= "H" then
				tmp_bonds[#tmp_bonds+1] = bond
			end
		end
		bonds = tmp_bonds
	end

    return atoms, bonds
end

return p