#!/usr/bin/python3 import sys sys.path.append('/home/aschinde/workspace/projects_python/library') import os,sys,math import numpy as np import cv2; import gzip #May need to use gzip.open instead of open import struct #struct unpack allows some interpretation of python binary data #Example ##import struct ## ##data = open("from_fortran.bin", "rb").read() ## ##(eight, N) = struct.unpack("@II", data) ## ##This unpacks the first two fields, assuming they start at the very ##beginning of the file (no padding or extraneous data), and also assuming ##native byte-order (the @ symbol). The Is in the formatting string mean ##"unsigned integer, 32 bits". #for integers #a = int #a.from_bytes(b'\xaf\xc2R',byteorder='little') #a.to_bytes(nbytes,byteorder='big') #analagous operation doens't seem to exist for floats #what about numpy? #https://www.devdungeon.com/content/working-binary-data-python #print("{:02d}".format(2)) #b = b.fromhex('010203040506') #b.hex() #c = b.decode(encoding='utf-8' or 'latin-1' or 'ascii'...) #print(c) #numpy arrays have tobytes #numpy arrays have frombuffer (converts to dtypes) # #q = np.array([15],dtype=np.uint8); #q.tobytes(); #q.tobytes(order='C') (options are 'C' and 'F' #q2 = np.buffer(q.tobytes(),dtype=np.uint8) #np.frombuffer(buffer,dtype=float,count=-1,offset=0) ##You could also use the < and > endianess format codes in the struct ##module to achieve the same result: ## ##>>> struct.pack('<2h', *struct.unpack('>2h', original)) ##'\xde\xad\xc0\xde' def bytereverse(bts): ## bts2 = bytes(len(bts)); ## for I in range(0,len(bts)): ## bts2[len(bts)-I-1] = bts[I]; N = len(bts); ## print(N); ## print(bts); ## bts2 = struct.pack('<{}h'.format(N), *struct.unpack('>{}h'.format(N), bts)) bts2 = bts; return bts2; #Read Labels def read_MNIST_label_file(fname): #fp = gzip.open('./train-labels-idx1-ubyte.gz','rb'); fp = gzip.open(fname,'rb'); magic = fp.read(4); #nitems = np.frombuffer(fp.read(4),dtype=np.int32)[0]; #some sort of endiannes problem bts = fp.read(4); #bts = bytereverse(bts); #nitems = np.frombuffer(bts,dtype=np.int32); nitems = np.int32(struct.unpack('>I',bts)[0]); #it was a non-native endianness in teh integer encoding #> < @ - endianness bts = fp.read(nitems); N = len(bts); labels = np.zeros((N),dtype=np.uint8); labels = np.frombuffer(bts,dtype=np.uint8,count=N); #for i in range(0,10): # bt = fp.read(1); # labels[i] = np.frombuffer(bt,dtype=np.uint8); fp.close(); return labels; def read_MNIST_image_file(fname): fp = gzip.open(fname,'rb'); magic = fp.read(4); bts = fp.read(4); nitems = np.int32(struct.unpack('>I',bts)[0]); bts = fp.read(4); nrows = np.int32(struct.unpack('>I',bts)[0]); bts = fp.read(4); ncols = np.int32(struct.unpack('>I',bts)[0]); images = np.zeros((nitems,nrows,ncols),dtype=np.uint8); for I in range(0,nitems): bts = fp.read(nrows*ncols); img1 = np.frombuffer(bts,dtype=np.uint8,count=nrows*ncols); img1 = img1.reshape((nrows,ncols)); images[I,:,:] = img1; fp.close(); return images; def read_training_data(): rootdir = '/home/aschinde/workspace/machinelearning/datasets/MNIST'; fname1 = 'train-labels-idx1-ubyte.gz'; fname2 = 'train-images-idx3-ubyte.gz'; labels = read_MNIST_label_file(os.path.join(rootdir,fname1)); images = read_MNIST_image_file(os.path.join(rootdir,fname2)); return [labels,images]; def read_test_data(): rootdir = '/home/aschinde/workspace/machinelearning/datasets/MNIST'; fname1 = 't10k-labels-idx1-ubyte.gz'; fname2 = 't10k-images-idx3-ubyte.gz'; labels = read_MNIST_label_file(os.path.join(rootdir,fname1)); images = read_MNIST_image_file(os.path.join(rootdir,fname2)); return [labels,images]; def show_MNIST_image(img): import matplotlib.pyplot as plt; plt.figure(); plt.imshow(255-img,cmap='gray'); plt.show(); return;