// // clusterconf.svl Clustering Tool for Conformers // // 22-jul-2015 (ti) db_RMSD bug fixed: if no atoms matches query SMILES, skip this entry // 12-nov-2013 (ao) replace cattok with tok_cat // 13-jul-2010 (ad) fixed panel database open bug // 09-jul-2009 (na) added argument for batch // 12-nov-2008 (na) patched for unassigned Cluster for the same conformer with center of cluster // 14-feb-2008 (sn) bug-fixed for terminalatoms || mask_list // 28-dec-2007 (sn) Add sd_RMSD function // 09-feb-2005 (rk) bug-fixed for terminalatoms || mask_list // 09-feb-2005 (rk) replaced dbv_ViewKeys with dbv_DefaultView terminalatoms // 26-aug-2003 (jg) revised for symmetry handling // 24-aug-2001 (jg) bug-fixed and add 'method' option // 14-aug-2001 (jg) Created // // COPYRIGHT (C) 2001-2017 MOLSIS INC. ALL RIGHTS RESERVED. // // PERMISSION TO USE, COPY, MODIFY AND DISTRIBUTE THIS SOFTWARE IS HEREBY // GRANTED PROVIDED THAT: (1) UNMODIFIED OR FUNCTIONALLY EQUIVALENT CODE // DERIVED FROM THIS SOFTWARE MUST CONTAIN THIS NOTICE; (2) ALL CODE DERIVED // FROM THIS SOFTWARE MUST ACKNOWLEDGE THE AUTHOR(S) AND INSTITUTION(S); (3) // THE NAMES OF THE AUTHOR(S) AND INSTITUTION(S) NOT BE USED IN ADVERTISING // OR PUBLICITY PERTAINING TO THE DISTRIBUTION OF THE SOFTWARE WITHOUT // SPECIFIC, WRITTEN PRIOR PERMISSION; (4) ALL CODE DERIVED FROM THIS SOFTWARE // BE EXECUTED WITH THE MOLECULAR OPERATING ENVIRONMENT (MOE) LICENSED FROM // MOLSIS INC. // // MOLSIS INC. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS // SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, // AND IN NO EVENT SHALL MOLSIS INC. BE LIABLE FOR ANY // SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER // RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF // CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. // #set title 'Conformer Clustering' #set class 'MOE:interactive' #set version 'RSI 2015.07.22' #set main 'ClusterConf' function db_ImportSD; function Superpose; function RenderStick; const CCG_VER = 0; //--------------------------------------------------------------------- // GUI //--------------------------------------------------------------------- const panel = [ title: 'Conformer Clustering', name: 'confcluster', windowName: 'Conformer Clustering', text: ['Clustering','Cancel'], // Shell button onTrigger: ['return','exit'], Hbox:[ Option:[title:'Conformer File:', name: 'mdbfile', onTrigger: 'return'], Button:[name:'browse', text:'Open',font:'mediumBold', onTrigger:'return'] ], Option:[ title:'Mol Field:', name: 'molfield' ], Text:[ title:'RMSD:', name:'rmsd', len:4, sensitive: 1, type : 'real' ], Radio:[ name:'method', title:'Method:', options:['Minimize Cluster','Maximize Cluster'], sensitive:1, bubbleHelp:['Minimize Cluster:Each entry belongs to the cluster\n' ' with the nearest cluster center.\n' 'Maximize Cluster:Make largest cluster.'] ], Separator:[], Hbox:[ Option:[ name:'display', title : 'Display Conformers:', titleFont:'medium', options : ['ALL','NONE'], sensitive:0, onTrigger: ['return'] ], Text:[ name:'n_conf', len:5, sensitive: 0, type : 'int' ], Checkbox:[ name:'heavyatoms', title:' Heavy Atoms Only', titleFont:'medium', sensitive:0, onTrigger:'return'] ], Radio:[ name:'color', title:'Color:', titleFont:'medium', options:['Cluster','Element'], sensitive:0, onTrigger: ['return'] ], Radio:[ name:'center', title:'Cluster Center:', titleFont:'medium', options:['Only','Stick','Select'], sensitive:0, onTrigger: ['return'] ] ]; local function atom_pairs atoms local n = length atoms; local idx = igen n; local i2 = cat apt rep [ idx, dec idx ]; local i1 = cat app igen dec idx; return apt get [[atoms],tr [i1,i2]]; endfunction local function _calc_rmsd [a_pos,b] local idx = aPrioZQH b; local equalatoms = [b] || (igen maxE idx == [idx]); equalatoms = equalatoms | (app length equalatoms > 1); local n_equalatoms = app length equalatoms; local j,k,c,pos,rmsd,rmsd0,m,n,check; rmsd0 = sqrt((add cat sqr (a_pos - aPos b))/ length b); for j = 1, length n_equalatoms loop c = equalatoms(j); local pairs = atom_pairs c; local terminalatoms = (c | (aHeavyValence c == 1)); // terminalatoms = terminalatoms || apt eqL [aBonds terminalatoms, uniq aBonds terminalatoms]; //(sn) if length (aBonds terminalatoms) == length (uniq aBonds terminalatoms) then terminalatoms = terminalatoms || apt eqL [aBonds terminalatoms, uniq aBonds terminalatoms]; else terminalatoms = []; endif pairs = cat [pairs,terminalatoms, app reverse terminalatoms]; loop for k = 1, length pairs loop pos = aPos pairs(k); aSetPos [rotl pairs(k), pos]; rmsd(k) = sqrt((add cat sqr (a_pos - aPos b))/ length b); aSetPos [pairs(k), pos]; endloop if minE rmsd < rmsd0 then rmsd0 = minE rmsd; m = indexof [rmsd0, rmsd]; aSetPos [rotl pairs(m), aPos pairs(m)]; check = 1; else check = 0; endif until check === 0 endloop endloop return rmsd0; endfunction //--------------------------------------------------------------------- // Clustering main code //--------------------------------------------------------------------- #if CCG_VER function conf_NormalizeAtoms,conf_ExpandAutomorphisms; const COMPARATORS = tr [ ['max', 'x_max'], ['min', 'x_min'] ]; local function ConfClustering [mdbfile, molfield, ent, rmsd, method, usecompf, efield, maxormin] apt db_EnsureField [mdbfile, ['Cluster', 'Center', 'RMSD'], ['int', 'int', 'float']]; local n = length ent; local comp_fcn = get [ COMPARATORS(2), indexof [maxormin, COMPARATORS(1)] ]; local i, j, mol, chains, atoms,pos,ConfDistMatrix, E; local entries = ent; local dbmol=[], confs=[]; for i = 1,n loop mol = first db_ReadFields [mdbfile, ent(i), molfield]; if usecompf then E(i) = cat db_ReadFields [mdbfile, ent(i), efield]; endif chains = mol_Create mol; atoms = cat cAtoms chains; pos(i) = aPos atoms; conf_NormalizeAtoms atoms; aSetForceRS [atoms, 0]; local hatoms = atoms | aAtomicNumber atoms > 1; dbmol(i) = mol_Extract hatoms; confs(i) = first conf_ExpandAutomorphisms [ dbmol(i), nest mol_aPos dbmol(i), 0 ]; oDestroy chains; endloop local atomcount = length first first pos; // calculate RMSD matrix for all combination of conformers ConfDistMatrix=[]; local x,y; for y = 1, n loop for x = 1, dec y loop local rmsds=[]; for i = 1, length confs(x) loop for j = 1, length confs(y) loop rmsds(i)(j) = first Superpose [ [confs(x)(i), confs(y)(j)] ]; endloop endloop ConfDistMatrix(x)(y) = sqrt min cat cat rmsds; ConfDistMatrix(y)(x) = ConfDistMatrix(x)(y); endloop ConfDistMatrix(y)(y) = 0; endloop // find all pairs within RMSD threashold local f_RMSD_Matrix = ConfDistMatrix < rmsd; i = 1; local orig_E = E; local orig_CMD = ConfDistMatrix; local ClusterCenter,ClusterConfList,f_Entry,f_Cluster; // get cluster center and entry list in each cluster while length f_RMSD_Matrix loop f_Entry = app add f_RMSD_Matrix; if usecompf then f_Entry = E; endif // find an entry that defines a cluster center f_Entry = put [ rep[0,length f_RMSD_Matrix], call [comp_fcn, f_Entry], 1 ]; ClusterCenter(i) = ent|f_Entry; // get conformers in the cluster ClusterConfList(i) = ent|cat (f_RMSD_Matrix|f_Entry); // strip entries in the cluster above from original entry list // do same process for f_RMSD_Matrix f_Cluster = not cat (f_RMSD_Matrix|f_Entry); ent = ent | f_Cluster; if usecompf then E = E | f_Cluster; endif f_RMSD_Matrix = f_RMSD_Matrix||[f_Cluster]; f_RMSD_Matrix = f_RMSD_Matrix|f_Cluster; i = i + 1; endloop // get RMSD from each Cluster Center ent = entries; ConfDistMatrix = ConfDistMatrix|indexof [ent, ClusterCenter]; ConfDistMatrix = perm [ConfDistMatrix,pack indexof [ent, ClusterCenter]]; ConfDistMatrix = apt mput [ConfDistMatrix,(ConfDistMatrix >= rmsd), 0]; //------------------------------------------------------------------- // Remove double-counted entry and assign only one center for each entry // pr ConfDistMatrix; pr n; local function RMSD_clusterclean[ConfDistMatrix, n, method] local i; ConfDistMatrix = tr ConfDistMatrix; local dist_list,ent_idx; if method === 'Minimize Cluster' then for i = 1,n loop dist_list = ConfDistMatrix(i)|ConfDistMatrix(i)>0; if length dist_list > 0 then ent_idx = first ( (indexof [dist_list,ConfDistMatrix(i)]) | dist_list == minE dist_list ); ConfDistMatrix(i) = rep [0,length ConfDistMatrix(i)]; ConfDistMatrix(i)(ent_idx) = minE dist_list; endif endloop else for i = 1,n loop dist_list = ConfDistMatrix(i)|ConfDistMatrix(i)>0; if length dist_list > 0 then ent_idx = indexof [first dist_list,ConfDistMatrix(i)]; ConfDistMatrix(i) = rep [0,length ConfDistMatrix(i)]; ConfDistMatrix(i)(ent_idx) = first dist_list; endif endloop endif ConfDistMatrix = tr ConfDistMatrix; return ConfDistMatrix; endfunction ConfDistMatrix = RMSD_clusterclean[ConfDistMatrix, n, method]; // unify the enties for each cluster. At this stage, if we are using // a float field rather than RMSD to define the cluster centres, we // need to move the cluster centre. // SHOULD WE THEN LOOP BACK AND RECALCULATE CLUSTER MEMBERS? for i = 1, length ClusterCenter loop ClusterConfList(i) = cat [ClusterCenter(i), ent | ConfDistMatrix(i)]; if usecompf then local ClusterE = orig_E | indexof [ ent, cat [ClusterCenter(i), ClusterConfList(i)] ]; ClusterE = perm [ ClusterE, pack indexof [ent, ClusterConfList(i)] ]; ClusterCenter(i) = get [ ClusterConfList(i), call [comp_fcn, ClusterE] ]; endif endloop // Moving the cluster centre means we have to reaquire the distance // matrix data for the output RMSD field if usecompf then orig_CMD = orig_CMD|indexof [ent, ClusterCenter]; orig_CMD = tr perm [orig_CMD,pack indexof [ent, ClusterCenter]]; for i = 1, n loop ConfDistMatrix(i) = mget [ orig_CMD(i), (apt indexof [ent(i), ClusterConfList]) ]; endloop else ConfDistMatrix = add ConfDistMatrix; endif //----------------------------------------------------------- // output data for each entry local k; print '------------------------'; write ['{t:} {n:}\n' , ['RMSD = ', rmsd]]; for i = 1, length ClusterCenter loop for j = 1,length ClusterConfList(i) loop if iadd (ClusterCenter == ClusterConfList(i)(j)) then k = 1; else k = 0; endif db_Write [mdbfile, ClusterConfList(i)(j), tag[['Cluster','Center'],[i,k]]]; endloop write [ '{t:} {n:}\n' , [ 'Cluster_', token swrite [ '{c:}',i ], ' = ', length ClusterConfList(i) ] ]; endloop for i = 1,n loop db_Write [mdbfile, ent(i), tag['RMSD',ConfDistMatrix(i)]]; endloop return [ClusterCenter,ClusterConfList]; endfunction ////// // db_conf_cluster.svl Cluster conformations of a database of molecules // // 19-aug-2015 (fk) singleton clusters also assigned // 05-jun-2007 (sg) added an option to select cluster centre based on float // 14-dec-2006 (ah) created based on clusterconf.svl & db_conf_cluster.svl // 04-dec-2002 (db) use Superpose to compute RMSD // 10-oct-2002 (db) fixed the vector of wrong length bug // 20-jan-2002 (cw) added comments...included with CCG support local function check_fields [mdb, mseq, E] local [fieldnames, fieldtypes] = db_Fields mdb; local molfieldnames = fieldnames | (fieldtypes == 'molecule'); [fieldnames, fieldtypes] = [fieldnames, fieldtypes] || [not (fieldtypes == 'molecule')]; if isnull mseq then if anytrue (fieldnames == 'mseq') then mseq = 'mseq'; else mseq = first fieldnames; endif elseif allfalse (fieldnames == mseq) then mseq = first fieldnames; endif if isnull E then if anytrue (fieldnames == 'E') then E = 'E'; else E = first (fieldnames | fieldtypes == 'float'); endif elseif allfalse (fieldnames == E) then E = first (fieldnames | fieldtypes == 'float'); endif return [mseq, E, fieldnames, molfieldnames]; endfunction global function db_conf_cluster [mdb, mseq, efield, method, rmsd] if MOE_BATCH and anytrue app isnull argument[] then exit 'Usage db_conf_cluster [mdb, mseq, method, rmsd]'; endif if istrue indexof [storage mdb, ['tok', 'int']] then mdb = db_Open [mdb, 'read-write']; else mdb = dbv_DefaultView []; endif local mdbfile = db_Filename mdb; local fieldnames, molfieldnames, ents, data, m, opts, mfield; [mseq, efield, fieldnames, molfieldnames] = check_fields [mdb, mseq, efield]; const PANEL = [ title: 'Database Conformer Clustering', name: 'db_conf_cluster', windowName: 'Database Conformer Clustering', text: ['OK','Cancel'], // Shell button onTrigger: ['return','exit'], Hbox: [ name:'box1', extendH: 1, FSBText:[ name: 'mdb', title: 'Database', bubbleHelp: 'Database for conformation clustering.', font: 'mediumBold', len:30, onTrigger: 'return' ], Button: [ name: 'browse', text: 'Browse...', font: 'mediumBold', onTrigger: 'return' ] ], Hbox: [ name:'box2', Option : [ name: 'mfield', title: 'Molecule Field', font: 'mediumBold', bubbleHelp: 'Molecule Field' ], Option : [ name: 'mseq', title: 'Molecule Identifier', font: 'mediumBold', bubbleHelp: 'Field to use for compound identifiers' ] ], Hbox: [ name:'box3', Checkbox:[ title: 'Comparison Field', name:'usecompf', bubbleHelp: 'Use a specific float field to\n' 'define cluster centres, rather\n' 'than cluster size. If disabled,\n' 'default comparison function is max\n' 'cluster size.' ], Option : [ name: 'efield', font: 'mediumBold', bubbleHelp: 'Specify comparison Field' ], Option : [ name: 'maxormin', title: 'Comparison Function', font: 'mediumBold', text:['min', 'max'], bubbleHelp: 'High or low values form\n' 'cluster centres' ] ], Hbox: [ name:'box4', Text:[ title: 'Select where RMSD >', name:'rmsd', len:6, sensitive: 1, type : 'real' ], Label: [ text: 'A between Conformations', font: 'mediumBold' ] ], Radio:[ name:'method', title:'Method:', options:['Minimize Cluster','Maximize Cluster'], sensitive:1, bubbleHelp:['Minimize Cluster:Each entry belongs to the cluster\n' 'with the nearest cluster center.\n' 'Maximize Cluster:Make largest cluster.'] ], Radio:[ name:'superpose', title:'Template Fitting:', options:['Superpose','Absolute Positions'], sensitive:1, bubbleHelp:['Superpose the atoms that match the selected atoms.', 'Use the absolute positions that match the selected atoms.'] ] ]; if anytrue app isnull [method, rmsd] then local wkey = WindowCreate PANEL; if isnull method then method = 'Highest';endif WindowSetData [wkey,[mdb:db_Filename mdb]]; WindowSetAttr [wkey,[mseq:[text:fieldnames]]]; WindowSetAttr [wkey,[mfield:[text:molfieldnames]]]; WindowSetAttr [wkey,[efield:[text:fieldnames]]]; WindowSetData [wkey,[efield:efield]]; WindowSetData [wkey,[mseq:mseq]]; WindowSetData [wkey,[rmsd:1]]; WindowShow wkey; // sg Set up a child task to monitor the usecompf checkbox if second task_fork [master:'parent', statics:'share'] == 'child' then task_prio 0; task_idle 1; local baval; loop sleep 0.15; baval = WindowGetData [wkey, 'usecompf']; if baval.usecompf === 1 then WindowSetAttr [wkey,[efield:[sensitive:1]]]; WindowSetAttr [wkey,[maxormin:[sensitive:1]]]; else WindowSetAttr [wkey,[efield:[sensitive:0]]]; WindowSetAttr [wkey,[maxormin:[sensitive:0]]]; WindowSetData [wkey,[maxormin:'max']]; endif endloop endif loop local [value,trigger] = WindowWait wkey; if trigger === 'browse' then mdbfile = FilePrompt [ title:'Select a Database', filter:'*.mdb' ]; WindowSetData[ wkey, [mdb:mdbfile]]; mdb = db_Open mdbfile; dbv_Open mdbfile; [mseq, efield, fieldnames, molfieldnames] = check_fields [ mdb, mseq, efield ]; WindowSetAttr [wkey,[mseq:[text:fieldnames]]]; WindowSetAttr [wkey,[mfield:[text:molfieldnames]]]; WindowSetData [wkey,[mseq:mseq]]; elseif trigger === 'db_conf_cluster' then if value.db_conf_cluster === 'OK' then mdb = db_Open value.mdb; method = value.method; mseq = value.mseq; rmsd = value.rmsd; endif break; endif endloop else value.method = method; value.rmsd = rmsd; value.mfield = db_FirstFieldType [mdb, 'molecule']; endif local paneloff = [ 'db_conf_cluster', 'box1', 'box2', 'box3', 'box4', 'method', 'superpose']; apt WindowSetAttr [wkey,[tag [ paneloff,[[sensitive:0]] ] ]]; local entries = db_Entries mdb; local mseqs = db_ReadColumn [mdb, mseq]; entries = entries [x_sort mseqs]; local split_ents = split [entries, mtoc m_uniq sort mseqs]; for ents in split_ents loop // if there are > entry/molecule, perform clustering if length ents > 1 then ConfClustering [ mdbfile, value.mfield, ents, value.rmsd, value.method, value.usecompf, value.efield, value.maxormin ]; // if there is only 1 entry/molecule assign cluster, center anyway elseif length ents === 1 then // make sure the fields are created already apt db_EnsureField [mdb, ['Cluster', 'Center', 'RMSD'], ['int', 'int', 'float']]; db_Write [mdb, ents, tag ['Cluster', 1]]; // assign cluster #1 db_Write [mdb, ents, tag ['Center', 1]]; // assign center db_Write [mdb, ents, tag ['RMSD', 0]]; // RMSD = 0 endif endloop db_Close mdb; endfunction #else global function ConfClustering [mdbfile, molfield, rmsd, method] local RMSD_FOR_SAMECONF=0.0000001; // patch for the same conformer (sufficiently small valule) write['ConfClustering [\'{t:}\',\n \'{t:}\', {n:}, \'{t:}\']\n', db_Filename mdbfile, molfield, rmsd, method]; if iadd (first db_Fields mdbfile == 'Cluster') === 0 then db_CreateField [mdbfile,'Cluster','int']; endif if iadd (first db_Fields mdbfile == 'Center') === 0 then db_CreateField [mdbfile,'Center','int']; endif if iadd (first db_Fields mdbfile == 'RMSD') === 0 then db_CreateField [mdbfile,'RMSD','float']; endif local ent = db_Entries mdbfile; local n = length ent; local i, j,chain, atoms,pos,mols,ConfDistMatrix; atoms = Atoms[]; for i = 1,n loop chain = mol_Create cat db_ReadFields [mdbfile, ent(i), molfield]; atoms = cat cAtoms chain; oDestroy (atoms|(aAtomicNumber atoms == 1)); atoms = cat cAtoms chain; if length SelectedAtoms[] > 0 then atoms = SelectedAtoms[]; endif mols(i) = atoms; endloop // calculate RMSD matrix for all combination of conformers for i = 1, n loop for j = 1,n loop ConfDistMatrix(i)(j) = _calc_rmsd [aPos mols(i),mols(j)]; if i<>j and ConfDistMatrix(i)(j)===0 then ConfDistMatrix(i)(j)=RMSD_FOR_SAMECONF; endif // patch for the same conformer endloop endloop Close [force:1]; // find all pairs within RMSD threashold local f_RMSD_Matrix = ConfDistMatrix < rmsd; i = 1; local ClusterCenter,ClusterConfList,f_Entry,f_Cluster; // get cluster center and entry list in each cluster while length f_RMSD_Matrix loop f_Entry = app add f_RMSD_Matrix; // find an entry with the largest number of neighbors // and define it as a cluster center f_Entry = put [rep[0,length f_RMSD_Matrix],x_max f_Entry,1]; ClusterCenter(i) = ent|f_Entry; // get conformers in the cluster ClusterConfList(i) = ent|cat (f_RMSD_Matrix|f_Entry); // strip entries in the cluster above from original entry list // do same process for f_RMSD_Matrix f_Cluster = not cat (f_RMSD_Matrix|f_Entry); ent = ent | f_Cluster; f_RMSD_Matrix = f_RMSD_Matrix||[f_Cluster]; f_RMSD_Matrix = f_RMSD_Matrix|f_Cluster; i = i + 1; endloop // get RMSD from each Cluster Center ent = db_Entries mdbfile; ConfDistMatrix = ConfDistMatrix|indexof [ent, ClusterCenter]; ConfDistMatrix = perm [ConfDistMatrix,pack indexof [ent, ClusterCenter]]; ConfDistMatrix = apt mput [ConfDistMatrix,(ConfDistMatrix >= rmsd), 0]; //------------------------------------------------------------------------- // Remove double-counted entry and assign only one center for each entry // ConfDistMatrix = tr ConfDistMatrix; local dist_list,ent_idx; if method === 'Minimize Cluster' then for i = 1,n loop dist_list = ConfDistMatrix(i)|ConfDistMatrix(i)>0; if length dist_list > 0 then ent_idx = first ((indexof [dist_list,ConfDistMatrix(i)])|dist_list == minE dist_list); ConfDistMatrix(i) = rep [0,length ConfDistMatrix(i)]; ConfDistMatrix(i)(ent_idx) = minE dist_list; endif endloop else for i = 1,n loop dist_list = ConfDistMatrix(i)|ConfDistMatrix(i)>0; if length dist_list > 0 then ent_idx = indexof [first dist_list,ConfDistMatrix(i)]; ConfDistMatrix(i) = rep [0,length ConfDistMatrix(i)]; ConfDistMatrix(i)(ent_idx) = first dist_list; endif endloop endif ConfDistMatrix = tr ConfDistMatrix; for i = 1, length ClusterCenter loop ClusterConfList(i) = cat [ClusterCenter(i),ent|ConfDistMatrix(i)]; endloop ConfDistMatrix = add ConfDistMatrix; //------------------------------------------------------------------------- // output data for each entry local k; print '------------------------'; write ['{t:} {n:}\n' , ['RMSD = ', rmsd]]; for i = 1, length ClusterCenter loop for j = 1,length ClusterConfList(i) loop if iadd (ClusterCenter == ClusterConfList(i)(j)) then k = 1; else k = 0; endif db_Write [mdbfile, ClusterConfList(i)(j), tag[['Cluster','Center'],[i,k]]]; endloop write ['{t:} {n:}\n' , ['Cluster_',token swrite ['{c:}',i],' = ', length ClusterConfList(i)]]; endloop print '------------------------'; for i = 1,n loop if ConfDistMatrix(i)===RMSD_FOR_SAMECONF then ConfDistMatrix(i)=0.0; endif // patch for the same conformer db_Write [mdbfile, ent(i), tag['RMSD',ConfDistMatrix(i)]]; endloop return [ClusterCenter,ClusterConfList]; endfunction global function ConfClusterDisplay [CenterFlag,ColScalar,ClusterCenter,ClusterConfList,heavyatoms,display,color,center] local i; local atoms = Atoms[]; local chains = Chains[]; if display === 'ALL' then aSetHidden [atoms,0]; else i = atoi token drop [string display,8]; aSetHidden [cat cAtoms (chains|not(ColScalar == i)),1]; aSetHidden [cat cAtoms (chains|(ColScalar == i)),0]; endif if color === 'Cluster' then aSetScalar [cAtoms chains, ColScalar / maxE ColScalar]; aSetColorBy [atoms,'scalar']; else aSetColorBy [atoms,'element']; endif if center === 'Select' then aSetBondLook [atoms,'line']; if display === 'ALL' then aSetSelected [cat cAtoms (chains |CenterFlag),1]; else aSetSelected [atoms,0]; aSetSelected [cat cAtoms (chains |(CenterFlag and (ColScalar == i))),1]; endif aSetSelected [SelectedAtoms[]|(aAtomicNumber SelectedAtoms[] == 1),not heavyatoms]; elseif center === 'Stick' then aSetBondLook [cat cAtoms (chains |CenterFlag),'cylinder']; aSetSelected [atoms,0]; else aSetBondLook [atoms,'line']; aSetSelected [atoms,0]; aSetHidden [cat cAtoms (chains | not CenterFlag),1]; endif atoms = cat cAtoms uniq aChain (atoms|not aHidden atoms); aSetHidden [atoms|(aAtomicNumber atoms == 1),heavyatoms]; View[]; endfunction global function RMSD [] local chains = Chains[]; local a,b,sm,a_pos,i,rmsd; if length SelectedAtoms[] > 1 then sm = sm_Extract [SelectedAtoms[], aPrioSMI SelectedAtoms[]]; else sm = sm_Extract [cat cAtoms first chains, aPrioSMI cat cAtoms first chains]; endif a = uniq cat sm_MatchAtoms [sm, cat cAtoms first chains]; a_pos = aPos a; for i = 1, length chains loop b = uniq cat sm_MatchAtoms [sm, cat cAtoms chains(i)]; rmsd = _calc_rmsd [a_pos,b]; write ['RMSD ({n:}) = {n:}\n',i, rmsd]; endloop endfunction global function db_RMSD [mdb, selected_ent, ent] if mdb === [] then mdb = dbv_DefaultView []; endif if isnull ent then ent = db_Entries mdb; endif if istrue selected_ent then ent = dbv_SelectedEntries mdb; endif local molfield = first db_Fields mdb | second db_Fields mdb == 'molecule'; local a,b,sm,a_pos,i,rmsd; local c_query; local a_query; if length SelectedAtoms [] > 1 then //and length Chains [] === 1 then sm = sm_Extract [SelectedAtoms[], aPrioSMI SelectedAtoms[]]; elseif length Atoms[] > 1 then //and length Chains [] === 1 then sm = sm_Extract [Atoms [], aPrioSMI Atoms []]; else Close [force:1]; mol_Create cat db_ReadFields [mdb, first ent, molfield]; sm = sm_Extract [Atoms [], aPrioSMI Atoms []]; endif a = uniq cat sm_MatchAtoms [sm, Atoms []]; a_pos = aPos a; // Close [force:1]; write ['Query SMILES: "{}"\n', sm]; // if isnull ent then db_EnsureField [mdb, 'RMSD', 'float']; // endif local chain; for i = 1, length ent loop chain = mol_Create cat db_ReadFields [mdb, ent(i), molfield]; b = uniq cat sm_MatchAtoms [sm, cat cAtoms chain]; write ['Entry No.{}: ', i]; if istrue b then // if there are no matched atoms, skip this entry rmsd = _calc_rmsd [a_pos, b]; write ['RMSD = {.2f} A\n', rmsd]; db_Write [mdb, ent(i), tag['RMSD', rmsd]]; else write ['No matched atoms\n']; endif oDestroy chain; //Close [force:1]; endloop endfunction global function db_RMSD_1vsM [db1, mseq1, xtalfield, db2, mseq2, dockfield, rmsdfield] local no1 = db_ReadColumn [db1, mseq1]; local ent1 = db_Entries db1; local no2 = db_ReadColumn [db2, mseq2]; local ent2 = db_Entries db2; db_EnsureField [db2, rmsdfield, 'float']; local i; for i = 1, length ent1 loop local mol1 = cat db_ReadFields [db1, ent1(i), xtalfield]; local chain1 = mol_Create mol1; local mask = m_join [no2, no1(i)]; if orE mask then local ent2_sub = ent2 | mask; db_RMSD [db2, [], ent2_sub]; endif oDestroy chain1; endloop endfunction //(sn) global function sd_RMSD [src_sdf] local tmp_mdb = tok_cat [fbase src_sdf, '_superposeRMSD.mdb']; local molfield = 'mol'; db_Open [tmp_mdb, 'create']; db_ImportSD [tmp_mdb, src_sdf, molfield , [], [], [], [file_filed:0] ]; apt db_CreateField [tmp_mdb,['pair_mol','sRMSD','xRMSD'],['molecule','float','float']]; local ent = db_Entries tmp_mdb; local n = length ent; local i, chain, atoms; chain = mol_Create cat db_ReadFields [tmp_mdb, first ent, molfield]; atoms = cat cAtoms chain; oDestroy (atoms|(aAtomicNumber atoms == 1)); atoms = cat cAtoms chain; local a_pos = aPos atoms; oDestroy chain; write ['{}\n\n','sd_RMSD Results']; write ['{}\n','#conf, RMSD']; for i = 1, n loop chain = mol_Create cat db_ReadFields [tmp_mdb, ent(i), molfield]; atoms = cat cAtoms chain; oDestroy (atoms|(aAtomicNumber atoms == 1)); atoms = cat cAtoms chain; local b = atoms; local xrmsd = _calc_rmsd [a_pos, b]; local b_pos = aPos b; local srmsd0 = xrmsd; local cnt = 0; loop cnt = inc cnt; local [srmsd, R, t] = Superpose[ [a_pos, b_pos], [] ]; R = app add ( tr R(1) * R[2] ); t = t(2) - app add ( tr R * t[1] ); local b_pos0 = app add (R * [b_pos - t]); aSetPos [b,b_pos0]; srmsd = _calc_rmsd [a_pos, b]; b_pos = aPos b; if abs (srmsd0 - srmsd) < 0.0001 then break; else srmsd0 = srmsd; endif until cnt > 5 endloop b_pos = select [b_pos0, b_pos, srmsd0 < srmsd]; aSetPos [b,b_pos]; srmsd = sqrt((add cat sqr (a_pos - b_pos))/ length b); db_Write [tmp_mdb, ent(i), [pair_mol: mol_Extract b,sRMSD:srmsd,xRMSD:xrmsd]]; write ['{n:5},{n:8.4f}\n',i,srmsd]; oDestroy chain; endloop endfunction global function ClusterConf args local ClusterCenter,ClusterConfList,CenterFlag,ColScalar; local mdb,mdbfile,molfield; mdb=first args; if isnull args then mdb = dbv_ViewKeys[]; if MOE_BATCH then write['usage: ClusterConf[ mdbfile, molfield, rmsd threshold, method ]\n']; write[' mdbfile : \'filename.mdb\'\n']; write[' molfield: \'field name for cluster in mdbfile\'\n']; write[' rmsd threshold: threshold value for clustering (angstrom)\n']; write[' method : \'Minimize Cluster\' (default) or \'Maximize Cluster\'\n']; return[]; endif elseif MOE_BATCH then // for moebatch Close[force:1]; if isnull args(2) then args(2)=db_FirstFieldType[args(1),'molecule']; endif if isnull args(3) then args(3)=1.0; endif if isnull args(4) then args(4)='Minimize Cluster'; endif ConfClustering args; // [mdbfile, molfield, rmsd, method] Close[force:1]; return[]; endif if mdb === [] then mdb = ''; mdbfile = ''; molfield = ''; else mdbfile = app db_Filename mdb; molfield = (first db_Fields first mdbfile)|(second db_Fields first mdbfile == 'molecule'); endif local wkey = WindowCreate panel; WindowShow wkey; WindowSetAttr [wkey,[mdbfile:[text:mdbfile],molfield:[text:molfield]]]; WindowSetData [wkey,[rmsd:1]]; loop local [value,trigger] = WindowWait wkey; if trigger === 'mdbfile' then molfield = db_Fields value.mdbfile; molfield = molfield(1) | molfield(2) == 'molecule'; WindowSetAttr [wkey,[mdbfile:[text:mdbfile],molfield:[text:molfield]]]; elseif trigger === 'browse' then mdb = dbv_Open[]; mdbfile = cat[db_Filename mdb, mdbfile]; molfield = db_Fields mdb; molfield = molfield(1) | molfield(2) == 'molecule'; WindowSetAttr [wkey,[mdbfile:[text:mdbfile],molfield:[text:molfield]]]; WindowSetData [wkey,[mdbfile:mdbfile(1)]]; elseif trigger === 'confcluster' then if value.confcluster === 'Clustering' then Close [force:1]; [ClusterCenter,ClusterConfList] = ConfClustering [value.mdbfile, value.molfield, value.rmsd,value.method]; Close [force:1]; CenterFlag = db_ReadColumn [value.mdbfile,'Center']; ColScalar = db_ReadColumn [value.mdbfile,'Cluster']; app db_CreateMolecule db_ReadColumn [value.mdbfile,value.molfield]; local n = igen length ClusterCenter; local i; local ClusterNumList = []; for i = 1,length ClusterCenter loop ClusterNumList(i) = tok_cat ['Cluster_', token swrite ['{c:}',n(i)]]; endloop ClusterNumList = cat ['ALL',ClusterNumList]; WindowSetAttr [wkey,[ 'confcluster':[ text: ['Recalc.','Cancel'], onTrigger: ['return','exit']], 'mdbfile':[sensitive:0,titleFont:'medium'], 'browse':[sensitive:0], 'molfield':[sensitive:0,titleFont:'medium'], 'rmsd':[sensitive:0,titleFont:'medium'], 'method':[sensitive:0,titleFont:'medium'], 'display':[sensitive:1,titleFont:'mediumBold', options:ClusterNumList], 'heavyatoms':[sensitive:1,titleFont:'mediumBold'], 'color':[sensitive:1,titleFont:'mediumBold'], 'center':[sensitive:1,titleFont:'mediumBold'] ] ]; WindowSetData [wkey,['n_conf':iadd app length ClusterConfList]]; ConfClusterDisplay [CenterFlag,ColScalar,ClusterCenter, ClusterConfList,value.heavyatoms,'ALL', 'Cluster','Only']; elseif value.confcluster === 'Recalc.' then Close [force:1]; WindowSetData [wkey,[display:'ALL',color:'Cluster',center:'Only','n_conf':iadd app length ClusterConfList]]; WindowSetAttr [wkey,[ 'confcluster':[ text: ['Clustering','Cancel'], onTrigger: ['return','exit']], 'mdbfile':[sensitive:1,titleFont:'mediumBold'], 'browse':[sensitive:1], 'molfield':[sensitive:1,titleFont:'mediumBold'], 'rmsd':[sensitive:1,titleFont:'mediumBold'], 'method':[sensitive:1,titleFont:'mediumBold'], 'heavyatoms':[sensitive:0,titleFont:'medium'], 'display':[sensitive:0,titleFont:'medium'], 'color':[sensitive:0,titleFont:'medium'], 'center':[sensitive:0,titleFont:'medium'] ] ]; endif else if value.display === 'ALL' then WindowSetData [wkey,['n_conf':iadd app length ClusterConfList]]; else WindowSetData [wkey,['n_conf':length ClusterConfList(atoi token drop [string value.display,8])]]; endif; ConfClusterDisplay [CenterFlag,ColScalar,ClusterCenter, ClusterConfList,value.heavyatoms,value.display, value.color,value.center]; if value.confcluster === 'Clustering' then Close[force:1]; endif; endif endloop endfunction #endif