master
Wojciech Janota 3 months ago
parent f7a81bf281
commit 89af341737

@ -1,22 +1,46 @@
from keras.models import Sequential, load_model from keras.models import Sequential, load_model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras import layers
import pandas as pd import pandas as pd
from tensorflow import keras
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.utils import register_keras_serializable
NUM_THREADS = 14
@register_keras_serializable()
def custom_loss_function(y_true, y_pred):
size_minimizer_part = (- K.sum(K.abs(y_pred))) / (64 * 255) # minimizing the size equals to higher coefficients = more 0 in DCT table = smaller size
mse_original_jpeg_part = K.mean(K.square(y_pred - y_true))
size_minimizer_part = size_minimizer_part * mse_original_jpeg_part
size_coefficient = 0.3
quality_coefficient = 0.7
return abs(size_minimizer_part * size_coefficient + mse_original_jpeg_part * quality_coefficient)
class NeuralNetworkEncoder: class NeuralNetworkEncoder:
def __init__(self, internal_activation_function: str, external_activation_function: str, optimizer: str, loss_function: str, image_dimension_x: int, image_dimension_y: int): def __init__(self, pretrained_weights_path: str = None, internal_activation_function: str = None, external_activation_function: str = None, optimizer: str = None, loss_function: str = None, image_dimension_x: int = None, image_dimension_y: int = None):
self.model = Sequential() tf.config.threading.set_intra_op_parallelism_threads(NUM_THREADS)
self.model.add(Conv2D(32, (3, 3), activation=internal_activation_function, input_shape=(image_dimension_x, image_dimension_y, 1)))
self.model.add(MaxPooling2D((2, 2))) tf.config.threading.set_inter_op_parallelism_threads(NUM_THREADS)
self.model.add(Conv2D(64, (3, 3), activation=internal_activation_function)) if pretrained_weights_path:
self.model.add(MaxPooling2D((2, 2))) self.model = load_model(pretrained_weights_path, custom_objects={'custom_loss_function': custom_loss_function})
self.model.add(Flatten()) else:
self.model.add(Dense(64, activation=internal_activation_function)) self.model = keras.Sequential([
self.model.add(Dense((8, 8), activation=external_activation_function)) layers.Reshape((512, 512, 1), input_shape=(262144,)),
self.model.compile(optimizer=optimizer, loss=loss_function) #layers.InputLayer(input_shape=(512 * 512, 1, 1)),
layers.Conv2D(32, (3, 3), activation=internal_activation_function, padding='same'),
def __init__(self, pretrained_weights_path: str): layers.MaxPooling2D((2, 2)),
self.model = load_model(pretrained_weights_path) layers.Conv2D(64, (3, 3), activation=internal_activation_function, padding='same'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(128, (3, 3), activation=internal_activation_function, padding='same'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(256, (3, 3), activation=internal_activation_function, padding='same'),
layers.Flatten(),
layers.Dense(64, activation=external_activation_function)
])
self.model.compile(optimizer=optimizer, loss=custom_loss_function)
def train(self, train_imageset: pd.DataFrame, train_quantization_dataset: pd.DataFrame, validation_dataset: pd.DataFrame, validation_quantization_dataset: pd.DataFrame, epochs: int, batch_size: int): def train(self, train_imageset: pd.DataFrame, train_quantization_dataset: pd.DataFrame, validation_dataset: pd.DataFrame, validation_quantization_dataset: pd.DataFrame, epochs: int, batch_size: int):
self.model.fit(train_imageset, train_quantization_dataset, validation_data=(validation_dataset, validation_quantization_dataset), epochs=epochs, batch_size=batch_size) self.model.fit(train_imageset, train_quantization_dataset, validation_data=(validation_dataset, validation_quantization_dataset), epochs=epochs, batch_size=batch_size)

@ -1,4 +1,23 @@
import click import click
from numpy import require
from longinus.utils.dataset import Dataset
from longinus.coeff_encoders.neural_network import NeuralNetworkEncoder
from longinus.utils.image_encoder import ImageEncoder
from PIL import Image
from longinus.utils.tools import psnr
from rich.console import Console
from rich.spinner import Spinner
from rich.progress import Progress
from sklearn.model_selection import train_test_split
import traceback
import pandas as pd
import os
import time
import numpy as np
import turbojpeg
from PIL.JpegImagePlugin import JpegImageFile
console = Console()
@click.group() @click.group()
def cli(): def cli():
@ -6,20 +25,172 @@ def cli():
pass pass
@click.command() @click.command()
def reencode(): @click.option('-i', '--input-dataset-path', type=click.Path(exists=True), required=True)
"""Reencodes the image using custom coefficient calculating algorithm""" @click.option('-o', '--output-images-path', type=click.Path(exists=True), required=True)
click.echo("Test") @click.option('-m', '--input-model-path', type=click.Path(exists=True), required=True)
pass @click.option('-e', '--export-data-path', type=str, required=True)
def bulk_bmp_reencode(input_dataset_path: str, input_model_path: str, output_images_path: str, export_data_path: str):
psnr_table = []
with Progress() as progress:
task = progress.add_task("[bold yellow]Processing images from provided dataset...", total=len(os.listdir(input_dataset_path)))
for filename in os.listdir(input_dataset_path):
console.print(f"\n[yellow]Processing image: {filename}")
file = os.path.join(input_dataset_path, filename)
if os.path.isfile(file):
pass
else:
continue
nn_start_time = time.perf_counter_ns()
input_image = ImageEncoder(file)
input_image.read()
input_data_array = []
input_data_array.append(input_image.luma_array)
data = pd.DataFrame(input_data_array)
encoder = NeuralNetworkEncoder(
pretrained_weights_path=input_model_path
)
result_quant_table = encoder.predict_quantization_table(data)
result_quant_table = result_quant_table.flatten()
# print(result_quant_table)
output_image_path = os.path.join(output_images_path, filename.replace('.bmp', '.jpeg'))
jpeg_lib_output_image_path = os.path.join(output_images_path, f"jpeglib_{filename.replace('.bmp', '.jpeg')}")
turbojpeg_output_image_path = os.path.join(output_images_path, f"turbojpeg_{filename.replace('.bmp', '.jpeg')}")
output_image = ImageEncoder(output_image_path)
output_image.load_image_from_mem(input_image.luma_array_2d)
output_image.override_coefficients(result_quant_table)
output_image.write()
nn_end_time = time.perf_counter_ns()
test_output_image = ImageEncoder(output_image_path)
test_output_image.read()
jpeglib_start_time = time.perf_counter_ns()
jpeglib_image = Image.open(file)
jpeglib_image = jpeglib_image.convert('L')
jpeglib_image.save(jpeg_lib_output_image_path, "JPEG")
jpeglib_end_time = time.perf_counter_ns()
jpeglib_output_image = ImageEncoder(jpeg_lib_output_image_path)
jpeglib_output_image.read()
turbojpeg_start_time = time.perf_counter_ns()
turbojpeg_image = Image.open(file).convert('L')
turbojpeg_image_output = turbojpeg.compress(np.array(turbojpeg_image), 75, turbojpeg.SAMP.GRAY)
with open(turbojpeg_output_image_path, "wb") as turbojpeg_output_file:
turbojpeg_output_file.write(turbojpeg_image_output)
turbojpeg_stop_time = time.perf_counter_ns()
turbojpeg_image_new = ImageEncoder(turbojpeg_output_image_path)
turbojpeg_image_new.read()
#if isinstance(jpeglib_output_image.image_ptr, JpegImageFile):
# if hasattr(jpeglib_output_image.image_ptr, 'quantization'):
# print(jpeglib_output_image.image_ptr.quantization[0])
calculated_psnr_original_neural_network = psnr(input_image.luma_array_2d, test_output_image.luma_array_2d)
calculated_psnr_original_jpeglib = psnr(input_image.luma_array_2d, jpeglib_output_image.luma_array_2d)
calculated_psnr_original_turbojpeg = psnr(input_image.luma_array_2d, turbojpeg_image_new.luma_array_2d)
result = {
'input_image': file,
'output_image': output_image_path,
'psnr_orig_to_nn': calculated_psnr_original_neural_network,
'input_size': os.path.getsize(file),
'output_size': os.path.getsize(output_image_path),
'jpeglib_size': os.path.getsize(jpeg_lib_output_image_path),
'turbojpeg_size': os.path.getsize(turbojpeg_output_image_path),
'psnr_orig_to_jpeglib': calculated_psnr_original_jpeglib,
'psnr_orig_to_turbojpeg': calculated_psnr_original_turbojpeg,
'compression_duration_nn': nn_end_time - nn_start_time,
'compression_duration_jpeglib': jpeglib_end_time - jpeglib_start_time,
'compression_duration_turbojpeg': turbojpeg_stop_time - turbojpeg_start_time
}
psnr_table.append(result)
progress.update(task, advance=1)
console.print("[bold green]Done!")
psnr_dataframe = pd.DataFrame(psnr_table)
psnr_dataframe.to_csv(export_data_path)
@click.command()
@click.option('-i', '--input-image-path', type=click.Path(exists=True), required=True)
@click.option('-m', '--input-model-path', type=click.Path(exists=True), required=True)
def reencode(input_image_path: str, input_model_path: str):
"""Reencodes the BMP image using custom coefficient calculating algorithm"""
click.echo("Reencode")
input_image = ImageEncoder(input_image_path)
input_image.read()
input_data_array = []
input_data_array.append(input_image.luma_array)
data = pd.DataFrame(input_data_array)
encoder = NeuralNetworkEncoder(
pretrained_weights_path=input_model_path
)
result_quant_table = encoder.predict_quantization_table(data)
result_quant_table = result_quant_table.flatten()
print(result_quant_table)
output_image = ImageEncoder(f"{input_image_path}_converted.jpeg")
output_image.load_image_from_mem(input_image.luma_array_2d)
output_image.override_coefficients(result_quant_table)
output_image.write()
@click.command() @click.command()
@click.argument('dataset_path', type=click.Path(exists=True), required=True) @click.option('-p', 'dataset_path', type=click.Path(exists=True), required=True)
def train(dataset_path: str): @click.option('-o', 'output_model_path', type=str)
@click.option('-i', 'input_model_path', type=click.Path(exists=True))
def train(dataset_path: str, output_model_path: str, input_model_path: str):
"""Train the models using the dataset""" """Train the models using the dataset"""
click.echo("Train") click.echo("Train")
pass if not input_model_path:
encoder = NeuralNetworkEncoder(
internal_activation_function="relu",
external_activation_function="linear",
optimizer="adam",
loss_function="mean_squared_error",
image_dimension_x=512,
image_dimension_y=512
)
dataset = Dataset(dataset_path=dataset_path)
dataset.load_prepared_dataset()
dataset_size = dataset.dataset_size
current_index = 0
for batch in range(100, dataset_size, 100):
if (current_index > 0):
print("Loading training model...")
encoder = NeuralNetworkEncoder(pretrained_weights_path=output_model_path)
print(f"Extracting luma values for first index={current_index}")
extracted_luma_values = dataset.extract_luma_table(batch_size=100, first_item=current_index)
print(f"Extracting dct values for first index={current_index}")
extracted_dct_values = dataset.extract_dct_table(batch_size=100, first_item=current_index)
print("Splitting the dataset")
train_luma_df, temp_luma_df = train_test_split(extracted_luma_values, test_size=0.4, random_state=42)
validation_luma_df, test_luma_df = train_test_split(temp_luma_df, test_size=0.5, random_state=42)
train_dct_df, temp_dct_df = train_test_split(extracted_dct_values, test_size=0.4, random_state=42)
validation_dct_df, test_dct_df = train_test_split(temp_dct_df, test_size=0.5, random_state=42)
print("Training the model...")
encoder.train(
train_imageset=train_luma_df,
train_quantization_dataset=train_dct_df,
validation_dataset=validation_luma_df,
validation_quantization_dataset=validation_dct_df,
epochs=32,
batch_size=5
)
print("Exporting the model")
encoder.export_weights(output_model_path)
current_index = batch
@click.command()
@click.option('-p', 'dataset_path', type=click.Path(exists=True), required=True)
def prepare_dataset(dataset_path: str):
click.echo("Prepare dataset")
try:
dataset = Dataset(dataset_path=dataset_path)
with console.status("[bold yellow]Preparing the dataset for training...") as status:
spinner = Spinner("dots", text="Processing")
dataset.prepare_dataset()
console.print("[bold green]Done! Processed files are now in directory: tmp_database")
except Exception as e:
console.print("[bold red]Error when preparing dataset")
print(e)
print(traceback.print_exc())
cli.add_command(reencode) cli.add_command(reencode)
cli.add_command(train) cli.add_command(train)
cli.add_command(prepare_dataset)
cli.add_command(bulk_bmp_reencode)
if __name__ == "__main__": if __name__ == "__main__":
cli() cli()

@ -16,48 +16,77 @@ class Dataset():
def prepare_dataset(self): def prepare_dataset(self):
counter = 0 counter = 0
for filename in os.listdir(self.dataset_path): for filename in os.listdir(self.dataset_path):
file = os.path.join(self.dataset_path, filename)
if os.path.isfile(file): if os.path.isfile(file):
counter += 1 counter += 1
else: else:
continue continue
file = os.path.join(self.dataset_path, filename)
image = Image.open(file) image = Image.open(file)
for i in range(0, 20): for i in range(0, 20):
sub_x = random.randint(0, image.width - 512) sub_x = random.randint(0, image.width - 513)
sub_y = random.randint(0, image.height - 512) sub_y = random.randint(0, image.height - 513)
subimage = image.crop(sub_x, sub_x + 512, sub_y, sub_y + 512) cropbox = (sub_x, sub_y, sub_x + 512, sub_y + 512)
subimage = image.crop(cropbox)
new_filename = os.path.join('tmp_database', filename.split('.')[0] + f'_subimage_{i}.jpg') new_filename = os.path.join('tmp_database', filename.split('.')[0] + f'_subimage_{i}.jpg')
subimage = subimage.convert('L') subimage = subimage.convert('L')
if not os.path.exists('tmp_database'):
os.makedirs('tmp_database')
subimage.save(new_filename) subimage.save(new_filename)
self.dataset_filenames.append(new_filename) self.dataset_filenames.append(new_filename)
print(new_filename)
self.dataset_size = counter self.dataset_size = counter
def load_prepared_dataset(self): def load_prepared_dataset(self):
counter = 0
for filename in os.listdir(self.dataset_path): for filename in os.listdir(self.dataset_path):
file = os.path.join(self.dataset_path, filename)
if os.path.isfile(file): if os.path.isfile(file):
counter += 1 counter += 1
else: else:
continue continue
file = os.path.join(self.dataset_path, filename)
self.dataset_filenames.append(file) self.dataset_filenames.append(file)
self.dataset_size = counter
def __len__(self): def __len__(self):
return self.dataset_size return self.dataset_size
def extract_luma_table(self): def extract_luma_table(self, batch_size: int, first_item: int):
dataframe = pd.DataFrame() data = []
counter = 0
for image_file in self.dataset_filenames: for image_file in self.dataset_filenames:
if counter < first_item:
counter+=1
continue
if (counter - first_item) > batch_size:
break
print(f"Processing image no={counter}")
image = Image.open(image_file) image = Image.open(image_file)
image = image.convert('L')
image_luma = image.getdata() image_luma = image.getdata()
image_luma = np.array(image_luma) image_luma = np.array(image_luma)
dataframe = dataframe.add(image_luma) data.append(image_luma.flatten())
counter+=1
print("Converting to dataframe")
dataframe = pd.DataFrame(data)
print("Done!")
return dataframe return dataframe
def extract_dct_table(self): def extract_dct_table(self, batch_size: int, first_item: int):
dataframe = pd.DataFrame() data = []
counter = 0
for image_file in self.dataset_filenames: for image_file in self.dataset_filenames:
if counter < first_item:
counter+=1
continue
if (counter - first_item) > batch_size:
break
print(f"Processing image no={counter}")
image = Image.open(image_file) image = Image.open(image_file)
image_dct = np.array(image.quantization[0]) image_dct = np.array(image.quantization[0])
dataframe = dataframe.add(image_dct) data.append(image_dct.flatten())
counter+=1
print("Converting to dataframe")
dataframe = pd.DataFrame(data)
print("Done!")
return dataframe return dataframe

@ -1,26 +0,0 @@
from scipy import misc
import numpy as np
import scipy
import cv2 as cv
class Image:
def __init__(self, path):
self.path = path
self.bgr_array = None
self.dct_array = None
def read(self):
self.bgr_array = cv.imread(self.path, cv.IMREAD_GRAYSCALE)
self.dct_array = cv.dct(np.float32(self.bgr_array))
def write(self):
pass
def extract_coefficients(self):
return self.dct_array
def load_image_from_mem(self, image_array):
self.bgr_array = image_array
def override_coefficients(self, new_dct_array):
self.dct_array = new_dct_array

1222
longinus/poetry.lock generated

File diff suppressed because it is too large Load Diff

@ -12,13 +12,18 @@ click = "^8.1.7"
numpy = "^1.26.4" numpy = "^1.26.4"
seaborn = "^0.13.2" seaborn = "^0.13.2"
pandas = "^2.2.1" pandas = "^2.2.1"
pyturbojpeg = "^1.7.3" pillow = "^10.3.0"
pillow = "^10.2.0"
scipy = "^1.12.0" scipy = "^1.12.0"
scikit-learn = "^1.4.1.post1" scikit-learn = "^1.4.1.post1"
opencv-python = "^4.9.0.80" opencv-python = "^4.9.0.80"
keras = "^3.3.3" keras = "^3.3.3"
keras-models = "^0.0.7" keras-models = "^0.0.7"
rich = "^13.7.1"
torch = "^2.3.0"
torchvision = "^0.18.0"
scikit-image = "^0.23.2"
tensorflow = "^2.16.1"
turbojpeg = "^0.0.2"
[build-system] [build-system]

Loading…
Cancel
Save