Spaces:

karolmajek
/

maxdeeplab

Runtime error

App Files Files Community

maxdeeplab / data /build_dvps_data.py

karolmajek

from https://huggingface.co/spaces/akhaliq/deeplab2

0924f30 about 4 years ago

raw

history blame contribute delete

9.13 kB

	# coding=utf-8
	# Copyright 2021 The Deeplab2 Authors.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	r"""Converts Depth-aware Video Panoptic Segmentation (DVPS) data to sharded TFRecord file format with tf.train.Example protos.

	The expected directory structure of the DVPS dataset should be as follows:

	+ DVPS_ROOT
	+ train \| val
	- ground-truth depth maps (*_depth.png)
	- ground-truth panoptic maps (*_gtFine_instanceTrainIds.png)
	- images (*_leftImg8bit.png)
	+ test
	- images (*_leftImg8bit.png)

	The ground-truth panoptic map is encoded as the following in PNG format:

	panoptic ID = semantic ID * panoptic divisor (1000) + instance ID


	The output Example proto contains the following fields:

	image/encoded: encoded image content.
	image/filename: image filename.
	image/format: image file format.
	image/height: image height.
	image/width: image width.
	image/channels: image channels.
	image/segmentation/class/encoded: encoded panoptic segmentation content.
	image/segmentation/class/format: segmentation encoding format.
	image/depth/encoded: encoded depth content.
	image/depth/format: depth encoding format.
	video/sequence_id: sequence ID of the frame.
	video/frame_id: ID of the frame of the video sequence.
	next_image/encoded: encoded next-frame image content.
	next_image/segmentation/class/encoded: encoded panoptic segmentation content
	of the next frame.

	The output panoptic segmentation map stored in the Example will be the raw bytes
	of an int32 panoptic map, where each pixel is assigned to a panoptic ID:

	panoptic ID = semantic ID * panoptic divisor (1000) + instance ID

	where semantic ID will be the same with `category_id` for each segment, and
	ignore label for pixels not belong to any segment.

	The depth map will be the raw bytes of an int32 depth map, where each pixel is:

	depth map = depth ground truth * 256

	Example to run the scipt:

	python deeplab2/data/build_dvps_data.py \
	--dvps_root=${DVPS_ROOT} \
	--output_dir=${OUTPUT_DIR}
	"""

	import math
	import os

	from typing import Sequence, Tuple, Optional

	from absl import app
	from absl import flags
	from absl import logging
	import numpy as np

	from PIL import Image

	import tensorflow as tf

	from deeplab2.data import data_utils

	FLAGS = flags.FLAGS

	flags.DEFINE_string('dvps_root', None, 'DVPS dataset root folder.')

	flags.DEFINE_string('output_dir', None,
	'Path to save converted TFRecord of TensorFlow examples.')

	_PANOPTIC_DEPTH_FORMAT = 'raw'
	_NUM_SHARDS = 1000
	_TF_RECORD_PATTERN = '%s-%05d-of-%05d.tfrecord'
	_IMAGE_SUFFIX = '_leftImg8bit.png'
	_LABEL_SUFFIX = '_gtFine_instanceTrainIds.png'
	_DEPTH_SUFFIX = '_depth.png'


	def _get_image_info_from_path(image_path: str) -> Tuple[str, str]:
	"""Gets image info including sequence id and image id.

	Image path is in the format of '{sequence_id}_{image_id}_*.png',
	where `sequence_id` refers to the id of the video sequence, and `image_id` is
	the id of the image in the video sequence.

	Args:
	image_path: Absolute path of the image.

	Returns:
	sequence_id, and image_id as strings.
	"""
	image_path = os.path.basename(image_path)
	return tuple(image_path.split('_')[:2])


	def _get_images(dvps_root: str, dataset_split: str) -> Sequence[str]:
	"""Gets files for the specified data type and dataset split.

	Args:
	dvps_root: String, path to DVPS dataset root folder.
	dataset_split: String, dataset split ('train', 'val', 'test').

	Returns:
	A list of sorted file names under dvps_root and dataset_split.
	"""
	search_files = os.path.join(dvps_root, dataset_split, '*' + _IMAGE_SUFFIX)
	filenames = tf.io.gfile.glob(search_files)
	return sorted(filenames)


	def _decode_panoptic_or_depth_map(map_path: str) -> Optional[str]:
	"""Decodes the panoptic or depth map from encoded image file.

	Args:
	map_path: Path to the panoptic or depth map image file.

	Returns:
	Panoptic or depth map as an encoded int32 numpy array bytes or None if not
	existing.
	"""
	if not tf.io.gfile.exists(map_path):
	return None
	with tf.io.gfile.GFile(map_path, 'rb') as f:
	decoded_map = np.array(Image.open(f)).astype(np.int32)
	return decoded_map.tobytes()


	def _get_next_frame_path(image_path: str) -> Optional[str]:
	"""Gets next frame path.

	If not exists, return None.

	The files are named {sequence_id}_{frame_id}*. To get the path of the next
	frame, this function keeps sequence_id and increase the frame_id by 1. It
	finds all the files matching this pattern, and returns the corresponding
	file path matching the input type.

	Args:
	image_path: String, path to the image.

	Returns:
	A string for the path of the next frame of the given image path or None if
	the given image path is the last frame of the sequence.
	"""
	sequence_id, image_id = _get_image_info_from_path(image_path)
	next_image_id = '{:06d}'.format(int(image_id) + 1)
	next_image_name = sequence_id + '_' + next_image_id
	next_image_path = None
	for suffix in (_IMAGE_SUFFIX, _LABEL_SUFFIX):
	if image_path.endswith(suffix):
	next_image_path = os.path.join(
	os.path.dirname(image_path), next_image_name + suffix)
	if not tf.io.gfile.exists(next_image_path):
	return None
	return next_image_path


	def _create_tfexample(image_path: str, panoptic_map_path: str,
	depth_map_path: str) -> Optional[tf.train.Example]:
	"""Creates a TF example for each image.

	Args:
	image_path: Path to the image.
	panoptic_map_path: Path to the panoptic map (as an image file).
	depth_map_path: Path to the depth map (as an image file).

	Returns:
	TF example proto.
	"""
	with tf.io.gfile.GFile(image_path, 'rb') as f:
	image_data = f.read()
	label_data = _decode_panoptic_or_depth_map(panoptic_map_path)
	depth_data = _decode_panoptic_or_depth_map(depth_map_path)
	image_name = os.path.basename(image_path)
	image_format = image_name.split('.')[1].lower()
	sequence_id, frame_id = _get_image_info_from_path(image_path)
	next_image_data = None
	next_label_data = None
	# Next image.
	next_image_path = _get_next_frame_path(image_path)
	# If there is no next image, no examples will be created.
	if next_image_path is None:
	return None
	with tf.io.gfile.GFile(next_image_path, 'rb') as f:
	next_image_data = f.read()
	# Next panoptic map.
	next_panoptic_map_path = _get_next_frame_path(panoptic_map_path)
	next_label_data = _decode_panoptic_or_depth_map(next_panoptic_map_path)
	return data_utils.create_video_and_depth_tfexample(
	image_data,
	image_format,
	image_name,
	label_format=_PANOPTIC_DEPTH_FORMAT,
	sequence_id=sequence_id,
	image_id=frame_id,
	label_data=label_data,
	next_image_data=next_image_data,
	next_label_data=next_label_data,
	depth_data=depth_data,
	depth_format=_PANOPTIC_DEPTH_FORMAT)


	def _convert_dataset(dvps_root: str, dataset_split: str, output_dir: str):
	"""Converts the specified dataset split to TFRecord format.

	Args:
	dvps_root: String, path to DVPS dataset root folder.
	dataset_split: String, the dataset split (e.g., train, val, test).
	output_dir: String, directory to write output TFRecords to.
	"""
	image_files = _get_images(dvps_root, dataset_split)
	num_images = len(image_files)

	num_per_shard = int(math.ceil(len(image_files) / _NUM_SHARDS))

	for shard_id in range(_NUM_SHARDS):
	shard_filename = _TF_RECORD_PATTERN % (dataset_split, shard_id, _NUM_SHARDS)
	output_filename = os.path.join(output_dir, shard_filename)
	with tf.io.TFRecordWriter(output_filename) as tfrecord_writer:
	start_idx = shard_id * num_per_shard
	end_idx = min((shard_id + 1) * num_per_shard, num_images)
	for i in range(start_idx, end_idx):
	image_path = image_files[i]
	panoptic_map_path = image_path.replace(_IMAGE_SUFFIX, _LABEL_SUFFIX)
	depth_map_path = image_path.replace(_IMAGE_SUFFIX, _DEPTH_SUFFIX)
	example = _create_tfexample(image_path, panoptic_map_path,
	depth_map_path)
	if example is not None:
	tfrecord_writer.write(example.SerializeToString())


	def main(argv: Sequence[str]) -> None:
	if len(argv) > 1:
	raise app.UsageError('Too many command-line arguments.')
	tf.io.gfile.makedirs(FLAGS.output_dir)
	for dataset_split in ('train', 'val', 'test'):
	logging.info('Starts to processing DVPS dataset split %s.', dataset_split)
	_convert_dataset(FLAGS.dvps_root, dataset_split, FLAGS.output_dir)


	if __name__ == '__main__':
	app.run(main)