# Markus Meier, March 2017, University of Manitoba # Encoding: Unicode UTF-8 # Example script how to extract data from the JSON file created by x3dna-dssr # This script takes advantage of the new "nt_resnum" field introduced in x3dna-dssr-v1.6.6-2017feb20 # and requires this version or higher. # This example uses PDBID 2M4P (www.rcsb.org) which is an NMR ensemble with 10 models # To generate the JSON file, execute "x3dna-dssr -i=2m4p.pdb -o=2m4p.json --more --nmr --json" # Comments # You can use the following code to record a chosen level of hierarchy into a file for inspection # sink("R_json_struct.txt") # print(attr(myData, "JSON")) # sink() # Setup R environment graphics.off() rm(list=ls()) library('dplyr') library('tidyjson') # Read JSON file generated by x3dna-dssr myData <- read_json("2m4p.json", format="json") # sink("R_json_struct.txt") # print(attr(myData, "JSON")) # sink() # Navigate through the data structure # x3dna-dssr generates a different depth of JSON data structure, # depending if the --nmr option is used or not # For JSON generated with --nmr option, use this code: nts_array <- myData %>% enter_object("models") %>% gather_array() %>% enter_object("parameters") %>% enter_object("nts") %>% gather_array() # For JSON generated without --nmr option, use this code, instead: # nts_array <- myData %>% # enter_object("nts") %>% # gather_array() # sink("R_json_struct_nts_array.txt") # print(attr(nts_array, "JSON")) # sink() # Now we have collapsed the JSON data enough that we can extract what we want to the dplyr tbl: # (chain ID, nucleotide number, nucleotide name, backbone torsion angles, sugar conformation ...) tidyData <- nts_array %>% spread_values(chain = jstring("chain_name")) %>% spread_values(id = jstring("nt_id")) %>% spread_values(resi = jstring("nt_resnum")) %>% spread_values(resn = jstring("nt_name")) %>% spread_values(alpha = jnumber("alpha")) %>% spread_values(beta = jnumber("beta")) %>% spread_values(gamma = jnumber("gamma")) %>% spread_values(delta = jnumber("delta")) %>% spread_values(epsilon = jnumber("epsilon")) %>% spread_values(zeta = jnumber("zeta")) %>% spread_values(chi = jnumber("chi")) %>% spread_values(baseSugar_conf = jstring("baseSugar_conf")) %>% spread_values(sugar_class = jstring("sugar_class")) # Use dplyr's filter() to select the nucleotides we are interested in: G4_all <- tidyData %>% filter( resn=="DG" ) G4_flankBulge <- G4_all %>% filter( resi==3 | resi==5 ) G4_others <- G4_all %>% filter( !(resi==3 | resi==5) ) # Show what's inside the tbl print(G4_flankBulge)