Import the model and make a prediction with real data
%load_ext autoreload
%autoreload 2
import re
import os
import sys
import shutil
from shutil import copyfile, copy2
from shutil import move
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
import seaborn as sns
from scipy import stats
# Cause plots to be displayed in the notebook:
%matplotlib inline
import subprocess
from matplotlib import cm
from latt2D_modules import calc_diffuse, calc_diffuse_cfs3, calc_diffuse_cfs4, calc_diffuse_cfs4_big
from latt2D_modules import get_occ_map, get_2D_occ_map_from_seq,store_occ_map_as_seq
from latt2D_modules import plot_occ_map,read_bin,output_16bit_pgm
import time
from tensorflow import keras
from tensorflow.keras import layers
2023-05-15 12:33:40.176392: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE4.1 SSE4.2 AVX AVX2 FMA To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from keras.metrics import MeanSquaredError, MeanAbsoluteError, MeanAbsolutePercentageError, RootMeanSquaredError
from sklearn.metrics import r2_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import h5py
from CFS_CNN_models import reconstruct_small_cfs_model
from CFS_CNN_models import reconstruct_medium_cfs_model
from CFS_CNN_models import reconstruct_large_cfs_model
from aux_functions import predict_and_regen_plot
import imageio.v2 as imageio
from scipy import stats
'''
helper function for univariate stats
'''
def get_univariate_analysis(x):
# Compute the mean, median, and standard deviation of the sample
mean = np.mean(x)
median = np.median(x)
variance = np.var(x)
std_dev = np.std(x)
# Test for normality using the Shapiro-Wilk test
stat, p_val = stats.shapiro(x)
if p_val < 0.05:
normality = "not normally distributed"
else:
normality = "normally distributed"
# Test for skewness using the skewness test
skewness, p_val = stats.skewtest(x)
if p_val < 0.05:
skewness_significance = "significant"
else:
skewness_significance = "not significant"
# Test for kurtosis using the kurtosis test
kurtosis, p_val = stats.kurtosistest(x)
if p_val < 0.05:
kurtosis_significance = "significant"
else:
kurtosis_significance = "not significant"
# Create a pandas dataframe with the computed statistics
df = pd.DataFrame({'mean': [mean], 'median': [median], 'variance': [variance],
'std_dev': [std_dev], 'normality': [normality],
'skewness': [skewness], 'skewness_significance': [skewness_significance],
'kurtosis': [kurtosis], 'kurtosis_significance': [kurtosis_significance]})
return df
def process_image_and_plot(img,threshold = 1.66, gamma = 20.0, maskoutbragg=False, ut=-2.218, mstd=1.5):
'''
stages of image processing
1. normalized so that mean intensity = 1.0
2. pixel intensities above a specified threshold are compressed (gamma controls compression curve)
3. scaling transformation so that log(I) ~ N has a specific u and variance
default is ut=-2.218, mstd=1.5 where mstd is a multiplier on the std )
'''
palette = sns.color_palette("Set2", n_colors=4)
img=img/np.mean(img.flatten())
vmax=np.mean(img.flatten())+1.0*np.std(img.flatten())
fig, axes = plt.subplots(2, 3, figsize=(10,6))
axes[0,0].imshow(img, cmap='gray',vmax=vmax)
axes[0,0].set_title('vmax= %.3f [mean(I)+std(I)]'%vmax,size=9)
axes[0,0].axis('off')
sns.histplot(ax=axes[0,1], data=img.flatten(), bins=64 , color=palette[0] , kde=True, stat='density',label='norm(I)')
sns.histplot(ax=axes[0,2], data=np.log(img.flatten()), bins=64 , color=palette[1] , kde=True, stat='density',label='log(I)')
axes[0,1].set_ylabel('')
axes[0,2].set_ylabel('')
axes[0,1].legend()
axes[0,2].legend()
axes[0,1].set_title('Original',size=9)
axes[0,2].set_title('Log(Original)',size=9)
img_sc=smooth_compress(img, threshold, gamma, maskoutbragg)
img_fix=transform_log_obs(img_sc.flatten().reshape((64,-1)), ut, mstd)
vmax=np.max(img_fix.flatten())-2.0*np.std(img_fix.flatten())
axes[1,0].imshow(img_fix, cmap='gray',vmax=vmax)
axes[1,0].set_title('vmax= %.3f [max(I)-2.0*std(I)]'%vmax,size=9)
axes[1,0].axis('off')
sns.histplot(ax=axes[1,1], data=img_fix.flatten(), bins=64 , color=palette[2] , kde=True, stat='density',label='norm(I)')
sns.histplot(ax=axes[1,2], data=np.log(img_fix.flatten()), bins=64 , color=palette[3] , kde=True, stat='density',label='log(I)')
axes[1,1].set_ylabel('')
axes[1,2].set_ylabel('')
axes[1,1].legend()
axes[1,2].legend()
axes[1,1].set_title('Filtered',size=9)
axes[1,2].set_title('Log(Filtered)',size=9)
df_res=get_univariate_analysis(img.flatten())
df_res=pd.concat([df_res,get_univariate_analysis(np.log(img.flatten()))],axis=0)
df_res=pd.concat([df_res,get_univariate_analysis(img_fix.flatten())],axis=0)
df_res=pd.concat([df_res,get_univariate_analysis(np.log(img_fix.flatten()))],axis=0)
df_res.insert(0, "processed", ["original","log_original","filtered","log_filtered"] )
df_res.reset_index(drop=True,inplace=True)
return df_res,img_fix
# A compression function
# Define a transformation function that smoothly compresses values above the threshold
def smooth_compress(img, threshold = 50, gamma = 1.0, maskoutbragg=False):
x=img.flatten()
y = np.zeros_like(x)
y[x <= threshold] = x[x <= threshold]
if maskoutbragg:
y[x > threshold] = (threshold/gamma) + np.log(1 + (x[x > threshold] - threshold) / threshold)
else:
y[x > threshold] = threshold + np.log(1 + (x[x > threshold] - threshold) / (gamma*threshold))
return y.reshape(img.shape)
# Compress the values using the smooth compression function
# compressed_img = smooth_compress(img)
def transform_log_obs(x,ut=-2.218,mstd=1.5):
'''
read in x: data that is log normally distributed
ut: target mean for the
mstd: multiplier on the desired change in std
'''
# define a scaling transform based on the stats from the theoretical data
log_X=np.log(x)
mean_log_X=np.mean(log_X)
var_log_X=np.var(log_X)
std_log_X=np.std(log_X)
target_mean= ut # this is the target u for the log(X)
target_var= (mstd*std_log_X)**2.0 # target std for log(X)
# normalize the log data
zn=(log_X-mean_log_X)/var_log_X
# transform the normed data
zp= zn*target_var + target_mean
return np.exp(zp)
cfs2_model_sm=reconstruct_small_cfs_model(3,'best_model_5000.h5')
cfs3_model_sm=reconstruct_small_cfs_model(8,'cfs3_best_model_5000.h5')
cfs4_model_sm=reconstruct_small_cfs_model(15,'cfs4_best_model_5000.h5')
cfs4_model_smX13=reconstruct_small_cfs_model(15,'cfs4_sm_X13_incremental.h5')
2023-05-15 12:33:53.888455: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE4.1 SSE4.2 AVX AVX2 FMA To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. 2023-05-15 12:33:53.890736: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
cfs4_model_mdX13=reconstruct_medium_cfs_model(15,'cfs4_md_X13.h5')
cfs4_model_lgX13=reconstruct_large_cfs_model(15,'cfs4_lg_X13.h5')
def get_metrics_compare_predict_with_regen(test_img,corr_in,exp=1):
corr_in=np.r_[1.0,corr_in]
corr_out=np.loadtxt('./expfiles_%d/corr.out'%exp)
imdat_0 = test_img.copy()
imdat_1 = read_bin('./expfiles_%d/hk0.bin'%exp, npixels=64, offset=1280)
# print(corr_in, corr_out.flatten())
# print(np.shape(imdat_0))
# print(np.shape(imdat_1))
r2c = r2_score(corr_in.flatten(), corr_out.flatten())
msec=mean_squared_error(corr_in,corr_out.flatten())
maec=mean_absolute_error(corr_in,corr_out.flatten())
r2i=r2_score(imdat_0.flatten(),imdat_1.flatten())
msei=mean_squared_error(imdat_0.flatten(),imdat_1.flatten())
maei=mean_absolute_error(imdat_0.flatten(),imdat_1.flatten())
return [r2c,msec,maec,r2i,msei,maei]
# Create a figure and axis for the plot
# Define a function to update the plot
def predict_and_regen_plot_cfs2(test_img):
iconc=0.50 # spin concentration
cread=1 # reading correlation function from input file otherwise it will be generated randomly
icycles=200 # MC cycles
ianneal=200 # MC input
rows,cols =1,4
fig, axes = plt.subplots(rows, cols, figsize=(10,6))
axes[0].imshow(test_img.reshape((64,-1)),cmap='gray')
axes[0].axis("off")
corrin=cfs2_model.predict(test_img)[0]
fhout=open('corr.in','w')
fhout.write("%.6f %.6f\n%.6f %.6f\n"%(1.0,corrin[0],corrin[1],corrin[2]))
fhout.close()
calc_diffuse(iconc,1,icycles,ianneal,1) # do the regen on the image
occ3D=get_occ_map('./expfiles_1/ising2D_occ.txt')
axes[1].imshow(np.transpose(occ3D[:,:,0]),interpolation='nearest',cmap='gray')
axes[1].axis("off")
imdat = read_bin('./expfiles_1/hk0.bin', npixels=64, offset=1280)
axes[2].imshow(np.flip(imdat,0),cmap='gray')
axes[2].axis("off")
corrf2=np.r_[1.0,corrin].reshape((2,-1))
sns.heatmap(corrf2, ax=axes[3], cmap='bwr', annot=True, square=True, vmin=-1,vmax=1.0, cbar=0) # cbar_kws={'shrink':0.55})
axes[3].axis('off')
my_metrics=get_metrics_compare_predict_with_regen(test_img.reshape((64,-1)),corrin,exp=1)
fig.suptitle("predicted 01: %.3f 10: %.3f 11: %.3f \n"%(tuple(corrin)) +"metrics(r2, mse, mae)\n corrfunc: %.3f, %.3f, %.3f \n FT_metrics: %.3f, %.3f, %.3f " %(tuple(my_metrics)), fontsize=12,y=0.80)
# Create a figure and axis for the plot
# Define a function to update the plot
def predict_and_regen_plot_cfs3(test_img):
iconc=0.50 # spin concentration
cread=1 # reading correlation function from input file otherwise it will be generated randomly
icycles=200 # MC cycles
ianneal=200 # MC input
rows,cols =1,4
fig, axes = plt.subplots(rows, cols, figsize=(10,6))
axes[0].imshow(test_img.reshape((64,-1)),cmap='gray')
axes[0].axis("off")
corrin=cfs3_model.predict(test_img)[0]
fhout=open('corr.in','w')
fhout.write("1.000000 %.6f %.6f\n%.6f %.6f %.6f\n%.6f %.6f %.6f\n"%(tuple(corrin.flatten())))
fhout.close()
calc_diffuse_cfs3(iconc,1,icycles,ianneal,1) # do the regen on the image
corrf3=np.r_[1.0,corrin].reshape((3,-1))
sns.heatmap(corrf3, ax=axes[3], cmap='bwr', square=True, annot=True, vmin=-1,vmax=1.0, cbar=0)
axes[3].axis('off')
occ3D=get_occ_map('./expfiles_1/ising2D_occ.txt')
axes[1].imshow(np.transpose(occ3D[:,:,0]),interpolation='nearest',cmap='gray')
axes[1].axis("off")
imdat = read_bin('./expfiles_1/hk0.bin', npixels=64, offset=1280)
axes[2].imshow(np.flip(imdat,0),cmap='gray')
axes[2].axis("off")
my_metrics=get_metrics_compare_predict_with_regen(test_img.reshape((64,-1)),corrin,exp=1)
fig.suptitle("metrics(r2, mse, mae)\n corrfunc: %.3f, %.3f, %.3f \n FT_metrics: %.3f, %.3f, %.3f " %(tuple(my_metrics)), fontsize=12,y=0.80)
# Create a figure and axis for the plot
# Define a function to update the plot
def predict_and_regen_plot_cfs4(test_img):
iconc=0.50 # spin concentration
cread=1 # reading correlation function from input file otherwise it will be generated randomly
icycles=300 # MC cycles
ianneal=300 # MC input
rows,cols =1,4
fig, axes = plt.subplots(rows, cols, figsize=(14,6))
axes[0].imshow(test_img.reshape((64,-1)),cmap='gray')
axes[0].axis("off")
corrin=cfs4_model.predict(test_img)[0]
fhout=open('corr.in','w')
fhout.write("1.000000 %.6f %.6f %.6f\n%.6f %.6f %.6f %.6f\n%.6f %.6f %.6f %.6f\n%.6f %.6f %.6f %.6f\n"%(tuple(corrin.flatten())))
fhout.close()
calc_diffuse_cfs4(iconc,1,icycles,ianneal,1) # do the regen on the image
corrf4=np.r_[1.0,corrin].reshape((4,-1))
sns.heatmap(corrf4, ax=axes[3], cmap='bwr', annot=True, square=True, vmin=-1,vmax=1.0,fmt=".3f" ,cbar=0 )
axes[3].axis('off')
occ3D=get_occ_map('./expfiles_1/ising2D_occ.txt')
axes[1].imshow(np.transpose(occ3D[:,:,0]),interpolation='nearest',cmap='gray')
axes[1].axis("off")
imdat = read_bin('./expfiles_1/hk0.bin', npixels=64, offset=1280)
axes[2].imshow(np.flip(imdat,0),cmap='gray')
axes[2].axis("off")
my_metrics=get_metrics_compare_predict_with_regen(test_img.reshape((64,-1)),corrin,exp=1)
fig.suptitle("metrics(r2, mse, mae)\n corrfunc: %.3f, %.3f, %.3f \n FT_metrics: %.3f, %.3f, %.3f " %(tuple(my_metrics)), fontsize=14,y=0.88)
obs_path='./obs_data_prep_for_testing/'
# grabbing the raw image
img_dcdnb_hk0_box1 = imageio.imread(obs_path+'dcdnb_hk0_box1_64x64.tif').astype(float)
df_res,img_fix=process_image_and_plot(img_dcdnb_hk0_box1,threshold = 1.66, gamma = 20.0, maskoutbragg=False, ut=-2.218, mstd=1.5)
df_res
processed | mean | median | variance | std_dev | normality | skewness | skewness_significance | kurtosis | kurtosis_significance | |
---|---|---|---|---|---|---|---|---|---|---|
0 | original | 1.000000 | 0.864191 | 0.911648 | 0.954803 | not normally distributed | 76.083846 | significant | 43.661071 | significant |
1 | log_original | -0.080273 | -0.145962 | 0.081160 | 0.284887 | not normally distributed | 55.175337 | significant | 36.095530 | significant |
2 | filtered | 0.121149 | 0.097498 | 0.005552 | 0.074514 | not normally distributed | 45.824290 | significant | 29.804490 | significant |
3 | log_filtered | -2.218000 | -2.327925 | 0.170234 | 0.412594 | not normally distributed | 30.433480 | significant | 16.780362 | significant |
# shape image for prediction
test_img=np.expand_dims(img_fix, axis=(0,-1))
np.shape(test_img)
(1, 64, 64, 1)
# myimg=test_img.reshape((64,-1))
# plt.imshow(myimg,cmap='gray')
corrin=cfs2_model_sm.predict(test_img)[0]
predict_and_regen_plot(calc_diffuse,test_img,corrin,cfs=2,iconc=0.5,icycles=200,ianneal=200)
1/1 [==============================] - 0s 35ms/step
corrin=cfs3_model_sm.predict(test_img)[0]
predict_and_regen_plot(calc_diffuse_cfs3,test_img,corrin,cfs=3,iconc=0.5,icycles=200,ianneal=200)
1/1 [==============================] - 0s 24ms/step
corrin=cfs4_model_sm.predict(test_img)[0]
predict_and_regen_plot(calc_diffuse_cfs4,test_img,corrin,cfs=4,iconc=0.5,icycles=300,ianneal=300)
1/1 [==============================] - 0s 27ms/step
corrin=cfs4_model_smX13.predict(test_img)[0]
predict_and_regen_plot(calc_diffuse_cfs4_big,test_img,corrin,cfs=4,iconc=0.5,icycles=500,ianneal=500)
1/1 [==============================] - 0s 34ms/step
corrin=cfs4_model_mdX13.predict(test_img)[0]
predict_and_regen_plot(calc_diffuse_cfs4_big,test_img,corrin,cfs=4,iconc=0.5,icycles=500,ianneal=500)
1/1 [==============================] - 0s 27ms/step
corrin=cfs4_model_lgX13.predict(test_img)[0]
predict_and_regen_plot(calc_diffuse_cfs4_big,test_img,corrin,cfs=4,iconc=0.5,icycles=500,ianneal=500)
1/1 [==============================] - 0s 39ms/step
# grabbing the raw image
img_hk0_box2 = imageio.imread(obs_path+'hk0_box2_64x64.tif').astype(float)
df_res,img_fix=process_image_and_plot(img_hk0_box2,threshold = 1.0, gamma = 20.0, maskoutbragg=False, ut=-0.218, mstd=1.1)
df_res
processed | mean | median | variance | std_dev | normality | skewness | skewness_significance | kurtosis | kurtosis_significance | |
---|---|---|---|---|---|---|---|---|---|---|
0 | original | 1.000000 | 0.801308 | 2.605537 | 1.614168 | not normally distributed | 74.716006 | significant | 43.213754 | significant |
1 | log_original | -0.171920 | -0.221510 | 0.154921 | 0.393600 | not normally distributed | 51.508209 | significant | 33.916966 | significant |
2 | filtered | 0.824363 | 0.812881 | 0.037180 | 0.192822 | not normally distributed | 29.201890 | significant | 24.131795 | significant |
3 | log_filtered | -0.218000 | -0.207170 | 0.048290 | 0.219750 | not normally distributed | 9.524045 | significant | 3.488362 | significant |
# shape image for prediction
test_img=np.expand_dims(img_fix, axis=(0,-1))
np.shape(test_img)
(1, 64, 64, 1)
# myimg=test_img.reshape((64,-1))
# plt.imshow(myimg,cmap='gray')
corrin=cfs2_model_sm.predict(test_img)[0]
predict_and_regen_plot(calc_diffuse,test_img,corrin,cfs=2,iconc=0.5,icycles=200,ianneal=200)
1/1 [==============================] - 0s 25ms/step
# myimg=test_img.reshape((64,-1))
# plt.imshow(myimg,cmap='gray')
corrin=cfs3_model_sm.predict(test_img)[0]
predict_and_regen_plot(calc_diffuse_cfs3,test_img,corrin,cfs=3,iconc=0.5,icycles=200,ianneal=200)
1/1 [==============================] - 0s 24ms/step
# myimg=test_img.reshape((64,-1))
# plt.imshow(myimg,cmap='gray')
corrin=cfs4_model_sm.predict(test_img)[0]
predict_and_regen_plot(calc_diffuse_cfs4,test_img,corrin,cfs=4,iconc=0.5,icycles=300,ianneal=300)
1/1 [==============================] - 0s 18ms/step
# myimg=test_img.reshape((64,-1))
# plt.imshow(myimg,cmap='gray')
corrin=cfs4_model_smX13.predict(test_img)[0]
predict_and_regen_plot(calc_diffuse_cfs4_big,test_img,corrin,cfs=4,iconc=0.5,icycles=300,ianneal=300)
1/1 [==============================] - 0s 25ms/step
# myimg=test_img.reshape((64,-1))
# plt.imshow(myimg,cmap='gray')
corrin=cfs4_model_mdX13.predict(test_img)[0]
predict_and_regen_plot(calc_diffuse_cfs4_big,test_img,corrin,cfs=4,iconc=0.5,icycles=300,ianneal=300)
1/1 [==============================] - 0s 26ms/step
# myimg=test_img.reshape((64,-1))
# plt.imshow(myimg,cmap='gray')
corrin=cfs4_model_lgX13.predict(test_img)[0]
predict_and_regen_plot(calc_diffuse_cfs4_big,test_img,corrin,cfs=4,iconc=0.5,icycles=300,ianneal=300)
1/1 [==============================] - 0s 32ms/step
# read in correlation datum for CFS2 griding
df_cfs2=pd.read_csv('output_correlations_1000.csv')
df_cfs2=df_cfs2.drop('Unnamed: 0',axis=1)
df_cfs2.reset_index(inplace=True,drop=True)
xvar=np.random.randint(0,len(df_cfs2),1)[0]
bin_dir='image_inputs_bin'
test_img = read_bin('%s/hk0_%s.bin'%(bin_dir,str(xvar).zfill(6)), npixels=64, offset=1280)
print(df_cfs2.iloc[xvar])
00 0.999836 01 -0.440164 10 -0.243364 11 -0.308964 Name: 32, dtype: float64
plt.imshow(np.flip(test_img,0),cmap='gray')
<matplotlib.image.AxesImage at 0x7f8cb061d060>
test_img=np.expand_dims(test_img, axis=(0,-1))
np.shape(test_img)
(1, 64, 64, 1)
corrin=cfs2_model_sm.predict(test_img)[0]
predict_and_regen_plot(calc_diffuse,test_img,corrin,cfs=2,iconc=0.5,icycles=200,ianneal=200)
1/1 [==============================] - 0s 29ms/step
corrin=cfs4_model_smX13.predict(test_img)[0]
predict_and_regen_plot(calc_diffuse_cfs4_big,test_img,corrin,cfs=4,iconc=0.5,icycles=300,ianneal=300)
1/1 [==============================] - 0s 20ms/step
# read in correlation datum for CFS4 griding
df_cfs4=pd.read_csv('cfs4_big_corr_out_1.csv')
df_cfs4=df_cfs4.drop('Unnamed: 0',axis=1)
df_cfs4.reset_index(inplace=True,drop=True)
xvar=np.random.randint(0,len(df_cfs4),1)[0]
bin_dir='cfs4_big_bin_1/'
test_img = read_bin('%s/hk0_%s.bin'%(bin_dir,str(xvar).zfill(6)), npixels=64, offset=1280)
print(df_cfs4.iloc[xvar])
00 0.999763 01 0.409051 02 0.043165 03 0.010613 10 -0.146939 11 -0.153883 12 0.015822 13 -0.197720 20 -0.472026 21 -0.340949 22 -0.081835 23 -0.268901 30 0.411655 31 0.292297 32 0.097853 33 0.300544 Name: 712, dtype: float64
test_img=np.expand_dims(test_img, axis=(0,-1))
np.shape(test_img)
(1, 64, 64, 1)
corrin=cfs2_model_sm.predict(test_img)[0]
predict_and_regen_plot(calc_diffuse,test_img,corrin,cfs=2,iconc=0.5,icycles=200,ianneal=200)
1/1 [==============================] - 0s 76ms/step
corrin=cfs3_model_sm.predict(test_img)[0]
predict_and_regen_plot(calc_diffuse_cfs3,test_img,corrin,cfs=3,iconc=0.5,icycles=200,ianneal=200)
1/1 [==============================] - 0s 66ms/step
corrin=cfs4_model_sm.predict(test_img)[0]
predict_and_regen_plot(calc_diffuse_cfs4,test_img,corrin,cfs=4,iconc=0.5,icycles=200,ianneal=200)
1/1 [==============================] - 0s 58ms/step
corrin=cfs4_model_smX13.predict(test_img)[0]
predict_and_regen_plot(calc_diffuse_cfs4_big,test_img,corrin,cfs=4,iconc=0.5,icycles=300,ianneal=300)
1/1 [==============================] - 0s 67ms/step