import h5py # Open the HDF5 file in read-only mode with h5py.File('image_dataset_5000.h5', 'r') as f: # Get a list of dataset names in the HDF5 file dataset_names = list(f.keys()) # Print the names of all datasets for name in dataset_names: print(name) dset=f['img_data'] X=dset[:]def bin_centers(bin_edges): return (bin_edges[1:]+bin_edges[:-1])/2.
ax=plt.hist(X.flatten(),bins=1000,log=True,density=True) print(np.mean(X.flatten())) print(np.median(X.flatten())) print(np.max(X.flatten())) print(np.min(X.flatten()))
ax=plt.hist(np.log(X.flatten()),bins=1000,density=True)from scipy import stats # Generate a sample of random numbers from a normal distribution # x = np.random.normal(loc=0, scale=1, size=100) def get_univariate_analysis(x): # Compute the mean, median, and standard deviation of the sample mean = np.mean(x) median = np.median(x) variance = np.var(x) std_dev = np.std(x) # Test for normality using the Shapiro-Wilk test stat, p_val = stats.shapiro(x) if p_val < 0.05: print("The data is not normally distributed (p = {:.3f}).".format(p_val)) else: print("The data is normally distributed (p = {:.3f}).".format(p_val)) # Test for skewness using the skewness test skewness, p_val = stats.skewtest(x) if p_val < 0.05: print("The data is significantly skewed (p = {:.3f}).".format(p_val)) else: print("The data is not significantly skewed (p = {:.3f}).".format(p_val)) # Test for kurtosis using the kurtosis test kurtosis, p_val = stats.kurtosistest(x) if p_val < 0.05: print("The data is significantly kurtotic (p = {:.3f}).".format(p_val)) else: print("The data is not significantly kurtotic (p = {:.3f}).".format(p_val)) # Print the computed statistics print("Mean: {:.3f}".format(mean)) print("Median: {:.3f}".format(median)) print("Variance: {:.3f}".format(variance)) print("Standard Deviation: {:.3f}".format(std_dev)) print("Skewness: {:.3f}".format(skewness)) print("Kurtosis: {:.3f}".format(kurtosis)) get_univariate_analysis(np.log(X.flatten()))
myhist,bin_edges=np.histogram(X.flatten(),bins=1000,density=True)test=np.log(myhist) newloghist=test[~np.isinf(test)] newbcent=bin_centers(bin_edges)[~np.isinf(test)]len(newbcent)m,b=np.polyfit(newbcent[15:-250],newloghist[15:-250],1) xs=np.linspace(0,20,20) ys=m*xs+bax=sns.scatterplot(x=newbcent,y=newloghist,marker='o',edgecolor='r',c='none',s=10,alpha=0.7,label='logplot hist') ax=plt.plot(xs,ys,c='k',marker='none',linestyle='--',alpha=0.6,label='line fit')
from scipy.optimize import curve_fit # Define the model function def exponential(x, a, b, c): return a * np.exp(-b * x) + c # Generate some sample data x = np.linspace(-13, 2, 50) # y = 2 * np.exp(-0.5 * x) + 0.5 + np.random.normal(scale=0.1, size=len(x)) # Fit the data to the model function using curve_fit popt, pcov = curve_fit(exponential, newloghist, newbcent) # Plot the data and the fitted curve ax=sns.scatterplot(y=newbcent,x=newloghist,marker='o',edgecolor='r',c='none',s=10,alpha=0.7,label='logplot hist') ax=plt.plot(x, exponential(x, *popt), color='k',linestyle='--',label='curve fit: %.2f*exp(-%.2fx)+(%.2f)'%tuple(popt)) plt.legend() print(popt)