import os
import sys

DEBUG_LOG_FFMPEG_VIDEO = False
#DEBUG_LOG_FFMPEG_VIDEO = True

# datestring utils
def string_to_timestamp(datetime_string):
	import datetime
	tz = ''
	date = ''
	time = ''
	year = None
	month = '1'
	day = '1'
	hour = '0'
	minute = '0'
	second = '0'
	microsecond = '0'
	tzinfo = None
	if 'Z' in datetime_string:
		(datetime_string, tz) = datetime_string.split('Z')
		tz_hours = 0
		tz_minutes = 0
		if ':' in tz:
			(tz_hours, tz_minutes) = tz.split(':')
		else:
			tz_hours = tz
		tzinfo = datetime.timezone(datetime.timedelta(hours=int(tz_hours), minutes=int(tz_minutes)))
	if 'T' in datetime_string:
		(date, time) = datetime_string.split('T')
	elif ' ' in datetime_string:
		(date, time) = datetime_string.split(' ')
	else:
		date = datetime_string
	if '-' in date:
		date_pieces = date.split('-')
		if 1 == len(date_pieces):
			year = date_pieces
		if 2 == len(date_pieces):
			(year, month) = date_pieces
		if 3 == len(date_pieces):
			(year, month, day) = date_pieces
	else:
		year = date
	if ':' in time:
		time_pieces = time.split(':')
		if 1 == len(time_pieces):
			hour = time_pieces[0]
		elif 2 == len(time_pieces):
			(hour, minute) = time_pieces
		elif 3 == len(time_pieces):
			(hour, minute, second) = time_pieces
	if '.' in second:
		(second, microsecond) = second.split('.')

	dt = datetime.datetime(int(year), int(month), int(day), int(hour), int(minute), int(second), int(microsecond), tzinfo=tzinfo)
	return int(dt.timestamp())

# ffmpeg utils (tested with ffmpeg 4)
def ffprobe_get_dimensions(file_path, ffprobe_path='ffprobe'):
	import subprocess
	res = subprocess.run(
		[
			ffprobe_path,
			'-v', 'error',
			'-select_streams', 'v',
			'-show_entries', 'stream=width,height',
			'-of', 'csv=p=0:s=x',
			file_path
		],
		capture_output=True
	)
	(width, height) = res.stdout.decode().strip().split('x')
	return (width, height)

def ffmpeg_resize_to_longest_edge(
		source_file_path,
		destination_file_path,
		ffmpeg_path='ffmpeg',
		edge=128,
		jpg_quality=8,
		webp_quality=2
	):
	import subprocess
	# ffmpeg argument to resize down if the longest edge is larger than edge (maintaining aspect ratio)
	scale = 'scale='
	scale +=	'if('
	scale +=		'lte(a\\,1)\\,'
	scale +=		'min(' + str(edge) + '\\,iw)\\,'
	scale +=		'-2'
	scale +=	')'
	scale +=	':'
	scale +=	'if('
	scale +=		'lte(a\\,1)\\,'
	scale +=		'-2\\,'
	scale +=		'min(' + str(edge) + '\\,ih)'
	scale +=	')'

	quality_arg = '-quality'
	if 'jpg' == destination_file_path[-3:]:
		#TODO something wrong with quality arguments on dreamhost
		#TODO maybe check ffmpeg version
		#quality_arg = '-q:v'
		quality_value = str(int(jpg_quality))
	elif 'webp' == destination_file_path[-4:]:
		quality_arg = '-quality'
		quality_value = str(int(webp_quality))
	else:
		#TODO raise TypeError or something to alert of unsupported type
		return False
	res = subprocess.run(
		[
			ffmpeg_path,
			'-v', 'error',
			#TODO some shared hosting enforces single-threaded processes
			'-threads', '1',
			'-i', source_file_path,
			'-vf', scale,
			'-vsync', '0',
			#'-fps_mode', '0',
			'-vframes', '1',
			quality_arg, quality_value,
			destination_file_path,
			'-y'
		],
		capture_output=True
	)
	if 0 != res.returncode:
		#TODO raise error to log here
		return False
	return True

def ffmpeg_create_thumbnail(
		source_file_path,
		destination_file_path,
		ffmpeg_path='ffmpeg',
		thumbnail_edge=128,
		thumbnail_jpg_quality=8,
		thumbnail_webp_quality=2
	):
	return ffmpeg_resize_to_longest_edge(
		source_file_path,
		destination_file_path,
		ffmpeg_path,
		thumbnail_edge,
		thumbnail_jpg_quality,
		thumbnail_webp_quality
	)

def ffprobe_get_video_duration_ms(file_path, ffprobe_path='ffprobe'):
	import subprocess
	# stream duration probe
	res = subprocess.run(
		[
			ffprobe_path,
			'-v', 'error',
			'-select_streams', 'v',
			'-show_entries', 'stream=duration',
			'-of', 'csv=p=0',
			file_path
		],
		capture_output=True
	)
	duration_s = res.stdout.decode().strip()
	if 'N/A' == duration_s:
		duration_s = None

	# missing duration after stream duration probe, do stream packets probe
	if not duration_s:
		res = subprocess.run(
			[
				ffprobe_path,
				'-v', 'error',
				'-select_streams', 'v',
				'-show_entries', 'packet=pts_time,dts_time',
				'-of', 'compact=p=0:nk=1:s=x', # compact, nokeys, separator "x"
				'-read_intervals', '9999999', #
				'-i', file_path,
			],
			capture_output=True
		)

		frames = res.stdout.decode('utf-8').split('\n')
		last_frame = frames.pop().strip()
		# discard empty values at end of list until we get to the real last frame
		while '' == last_frame:
			last_frame = frames.pop().strip()
		(dts_time, pts_time) = last_frame.split('x')

		# dts_time
		if dts_time:
			duration_s = dts_time
		# pts_time
		elif pts_time:
			duration_s = pts_time

	# missing duration after stream packets probe, do stream tag duration probe
#	if not duration_s:
#		res = subprocess.run(
#			[
#				ffprobe_path,
#				'-v', 'error',
#				'-select_streams', 'v',
#				'-show_entries', 'stream_tags=DURATION',
#				'-of', 'csv=p=0:nk=1',
#				file_path
#			],
#			capture_output=True
#		)
#		#TODO format timestamp into seconds
#		duration_s = res.stdout.decode().strip()

	# still missing duration after packets probe
#	if not duration_s:
#		#TODO attempt to estimate duration by seeking last keyframe from end
#		# and doing some math based on frame length
#		try:
#			fh = fopen(file_path, 'rb')
#			fseek(fh, -4, SEEK_END)
#			r = unpack('N', fread(fh, 4))
#			last_tag_offset = r[1]
#			fseek(fh, -(last_tag_offset + 4), SEEK_END)
#			fseek(fh, 4, SEEK_CUR)
#			t0 = fread(fh, 3)
#			t1 = fread(fh, 1)
#			r = unpack('N', t1 . t0)
#			duration_ms = r[1]
#			duration_s = duration_ms / 1000

	if not duration_s:
		return -1

	return int(round(float(duration_s) * 1000))

def ffprobe_get_gif_duration_frames(file_path, ffprobe_path='ffprobe'):
	import subprocess
	res = subprocess.run(
		[
			ffprobe_path,
			'-v', 'error',
			'-select_streams', 'v:0',
			'-count_frames',
			'-show_entries', 'stream=nb_read_frames',
			'-of', 'csv=p=0',
			file_path
		],
		capture_output=True
	)
	return int(res.stdout.decode().strip())

def ffprobe_get_audio_duration_ms(file_path, ffprobe_path='ffprobe'):
	import subprocess
	import subprocess
	res = subprocess.run(
		[
			ffprobe_path,
			'-v', 'error',
			'-select_streams', 'a',
			'-show_entries', 'stream=duration',
			'-of', 'csv=p=0',
			file_path
		],
		capture_output=True
	)
	duration_s = float(res.stdout.decode().strip())

	return int(round(float(duration_s) * 1000))

def ffmpeg_create_thumbnail_video_clip(
		source_file_path,
		destination_file_path,
		ffmpeg_path='ffmpeg',
		file_duration_ms=500,
		thumbnail_edge=128,
		thumbnail_video_clip_webm_quality=70,
		thumbnail_video_clip_duration_ms=3000):
	import subprocess

	if 0 < thumbnail_video_clip_duration_ms:
		if file_duration_ms <= thumbnail_video_clip_duration_ms:
			start_ms = 0
			end_ms = file_duration_ms
		else:
			midpoint_ms = file_duration_ms / 2
			half_video_clip_duration_ms = thumbnail_video_clip_duration_ms / 2
			start_ms = midpoint_ms - half_video_clip_duration_ms
			end_ms = start_ms + thumbnail_video_clip_duration_ms

		if 'gif' == source_file_path[-3:]:
			end_ms = -1

		return ffmpeg_reencode_video(
			source_file_path,
			destination_file_path,
			ffmpeg_path,
			longest_edge=thumbnail_edge,
			start_ms=start_ms,
			end_ms=end_ms,
			webm_quality=thumbnail_video_clip_webm_quality,
			muted=True
		)
	return False

def ffmpeg_reencode_video(
		source_file_path,
		destination_file_path,
		ffmpeg_path='ffmpeg',
		longest_edge=128,
		start_ms=0,
		end_ms=0,
		webm_quality=70,
		muted=False
	):
	import subprocess

	cmd = [
		ffmpeg_path,
		'-v', 'error',
		#TODO some shared hosting enforces single-threaded processes
		'-threads', '1',
	]
	if (
			0 < end_ms
			and start_ms < end_ms
		):
		start_s = start_ms / 1000
		end_s = end_ms / 1000
		cmd += [
			'-ss', str(start_s),	# start timestamp
			'-i', source_file_path,	# input file
			'-t', str(end_s - start_s),
		]

	else:
		cmd += [
			'-i', source_file_path,
		]

	if muted:
		cmd += ['-an']

	# ffmpeg argument to resize down if the longest edge is larger than thumbnail_edge (maintaining aspect ratio)
	scale = 'scale='
	scale +=	'if('
	scale +=		'lte(a\\,1)\\,'
	scale +=		'min(' + str(longest_edge) + '\\,iw)\\,'
	scale +=		'-2'
	scale +=	')'
	scale +=	':'
	scale +=	'if('
	scale +=		'lte(a\\,1)\\,'
	scale +=		'-2\\,'
	scale +=		'min(' + str(longest_edge) + '\\,ih)'
	scale +=	')'

	cmd += [
		#TODO specifying codec breaks for some ffmpeg?
		#'-vcodec', 'libvpx',
		'-quality', 'good',
		#TODO
		#'-cpu-used', '5',
		'-vf', scale,
		'-quality', str(webm_quality),
		destination_file_path,
	]
	res = subprocess.run(
		cmd,
		capture_output=True
	)

	#DEBUG ffmpeg video manipulation
	if DEBUG_LOG_FFMPEG_VIDEO:
		current_dir_path = os.path.dirname(os.path.realpath(__file__))
		with open(current_dir_path + '/thalassa_ffmpeg.log', 'a') as fh:
			fh.write('cmd: ' + "\n" + str(cmd) + "\n")
			fh.write('stdout: ' + "\n" + str(res.stdout) + "\n")
			fh.write('stderr: ' + "\n" + str(res.stderr) + "\n")
	#DEBUG

	if 0 != res.returncode:
		#TODO raise error to log here
		return False
	return True

#TODO anywhere with exceptions do robust error logging
class Thalassa:
	version = '0.8.0'
	dir_path = os.path.dirname(os.path.realpath(__file__))

	def __init__(self, config_file_path='', provided_token=None):
		try:
			import bcrypt
		except Exception as e:
			raise ValueError('Missing module bcrypt')

		# vars
		self.config_file_path = config_file_path
		self.config = {}
		self.active_tokens = {}
		self.provided_token = provided_token
		self.authorized = False
		self.token_mode = ''
		self.category_mimes = {
			'application': [
				'application/x-dosexec',
				'application/x-shockwave-flash',
				'application/pdf',
				# application special mimetype?
				'application/x-msdownload',
			],
			'audio': [
				'audio/mpeg',
				'audio/x-wav',
				'audio/x-flac',
				'audio/midi',
				'audio/x-mod',
				# audio special mimetype?
				'audio/mp3',
				'audio/x-ms-wma',
				'audio/ogg',
				'audio/webm',
				'audio/3gpp',
				'audio/3gpp2',
				'audio/aac',
				'audio/ac3',
				'audio/x-aiff',
				'audio/aiff',
			],
			'archive':	[
				'application/zip',
				'application/x-gzip',
				'application/x-tar',
				'application/x-7z-compressed',
				'application/x-rar',
				'application/x-bzip',
				'application/x-bzip2',
				# archive special mimetype?
				'application/x-rar-compressed',
				'application/gzip',
				'application/x-zip',
				'application/x-zip-compressed',
				'application/s-compressed',
				'multipart/x-zip',
				'application/x-gtar',
				'application/x-gzip-compressed',
			],
			'image': [
				'image/png',
				'image/jpeg',
				'image/gif',
				'image/svg+xml',
				# image special mimetype?
				'image/webp',
			],
			'text':	[
				'text/plain',
				'text/x-c++',
				# text special mimetype?
				'text/srt',
				'text/vtt',
				#TODO markdown gets its own category later maybe?
				'text/markdown',
				'text/x-markdown',
				#TODO html and other markup get their own category later maybe?
				'text/html',
			],
			'video': [
				'video/webm',
				'video/mp4',
				'application/ogg',
				'video/x-ms-asf',
				'video/x-flv',
				'video/x-msvideo',
				'video/quicktime',
				'video/3gpp',
				# video special mimetype?
				'video/ogg',
				'video/mpeg',
				'video/3gpp2',
				'video/x-matroska',
				'video/x-ms-wmv',
				'video/avi',
				'application/x-troff-msvideo',
			],
		}

		# config
		if not self.config_file_path:
			self.config_file_path = os.path.join(self.dir_path, '.thalassa_config.json')
		self.config = self.get_config(self.config_file_path)

		# tokens
		self.active_tokens = self.get_active_tokens(self.config['tokens_file_path'])

		if self.provided_token and self.provided_token in self.active_tokens.keys():
			self.authorized = True
			self.token_mode = self.active_tokens[self.provided_token]['mode']

	# log
	def log(self, log_line, verbose=False, *kwargs):
		if not self.config['enable_log']:
			return
		if verbose:
			if not self.config['verbose_log']:
				self.log('trying to verbose log when verbose log is false')
				return
		try:
			with open(self.config['log_file_path'], 'a') as fh:
				fh.write(log_line + "\n")
		except Exception as e:
			raise Exception(e)
			raise ValueError('Problem writing to log')

	# config
	@staticmethod
	def get_config(config_file_path):
		import json
		if not os.path.exists(config_file_path):
			raise OSError('Missing configuration')
		config = {}
		try:
			with open(config_file_path, 'r') as fh:
				config = json.load(fh)
		except Exception as e:
			raise ValueError('Problem getting configuration')
		return config

	# tokens
	@staticmethod
	def generate_token():
		import uuid
		return uuid.uuid4().hex

	def write_new_token(self, mode='viewer'):
		self.log('writing new token', True)
		new_token = self.generate_token()
		self.active_tokens[new_token] = {'mode': mode, 'name': ''}
		self.write_active_tokens()
		return new_token

	def write_active_tokens(self):
		self.log('writing active tokens', True)
		import json
		try:
			with open(self.config['tokens_file_path'], 'w') as fh:
				json.dump(self.active_tokens, fh)
		except Exception as e:
			self.log(str(e))
			raise ValueError('Problem writing tokens')

	def set_tokens_mode(self, tokens=[], mode='viewer'):
		self.log('setting tokens mode', True)
		if 0 == len(tokens):
			return

		unfound_tokens = []
		for token in tokens:
			if token not in self.active_tokens.keys():
				unfound_tokens.append(token)
			self.active_tokens[token]['mode'] = mode
			# changed own token
			if token == self.provided_token:
				self.mode = mode

		self.write_active_tokens()

		if 0 < len(unfound_tokens):
			log = 'While setting tokens mode "' + mode + '" encountered nonexistant tokens: '
			for unfound_token in unfound_tokens:
				log += unfound_token + ','
			self.log(log)

	def set_token_name(self, token, name):
		self.log('setting token name', True)
		if token not in self.active_tokens.keys():
			self.log('Trying to set name of non-existant token "' + token + '" to "' + name + '"')
			return
		self.active_tokens[token]['name'] = name
		self.write_active_tokens()

	def remove_tokens(self, tokens=[]):
		self.log('removing tokens', True)
		if 0 == len(tokens):
			return

		unfound_tokens = []
		for token in tokens:
			if token not in self.active_tokens.keys():
				unfound_tokens.append(token)
			self.active_tokens.pop(token, None)
			# removed own token
			if token == self.provided_token:
				self.authorized = False
				self.token_mode = ''

		# if the last token was removed generate a fresh token so there's at least one active
		if 0 == len(self.active_tokens):
			new_token = self.generate_token()
			self.active_tokens[new_token] = False

		self.write_active_tokens()

		if 0 < len(unfound_tokens):
			log = 'While removing tokens encountered nonexistant tokens: '
			for unfound_token in unfound_tokens:
				log += unfound_token + ','
			self.log(log)

	def get_active_tokens(self, tokens_file_path):
		self.log('getting active tokens', True)
		import json
		active_tokens = {}
		if os.path.exists(tokens_file_path):
			try:
				with open(tokens_file_path, 'r') as fh:
					active_tokens = json.load(fh)
			except Exception as e:
				self.log(str(e))
				raise ValueError('Problem getting tokens')
		if 0 == len(active_tokens):
			self.write_new_token()
		return active_tokens

	# pass
	def is_first_pass_set(self):
		self.log('checking if first pass is set', True)
		if os.path.exists(self.config['pass_hash_file_path']):
			return True
		return False

	@staticmethod
	def hash_pass(plain_pass):
		import bcrypt
		salt = bcrypt.gensalt()
		return bcrypt.hashpw(plain_pass.encode('utf-8'), salt).decode('utf-8')

	def get_current_pass_hash(self):
		self.log('getting current pass hash', True)
		current_pass_hash = ''
		try:
			with open(self.config['pass_hash_file_path'], 'r') as fh:
				current_pass_hash = fh.read()
		except Exception as e:
			self.log(str(e))
			raise OSError('Problem reading pass hash')
		if '' == current_pass_hash:
			os.remove(self.config['pass_hash_file_path'])
			self.log('Tried to get current pass hash but file contained empty string')
			raise ValueError('Pass not set')
		return current_pass_hash

	def write_pass_hash(self, pass_hash):
		self.log('writing pass hash', True)
		try:
			with open(self.config['pass_hash_file_path'], 'w') as fh:
				fh.write(pass_hash)
		except Exception as e:
			self.log(str(e))
			raise OSError('Problem writing pass hash')

	def set_pass(self, new_pass='', new_pass_confirmation=''):
		self.log('setting pass', True)
		if not new_pass:
			self.log('Tried setting pass without entering a pass')
			raise ValueError('New pass not entered')
		if new_pass != new_pass_confirmation:
			self.log('Tried setting pass but confirmation pass didn\'t match')
			raise ValueError('New pass confirmation mismatch')
		self.write_pass_hash(self.hash_pass(new_pass))

	def is_correct_pass(self, plain_pass):
		self.log('checking for correct pass', True)
		import bcrypt
		return bcrypt.checkpw(plain_pass.encode('utf-8'), self.get_current_pass_hash().encode('utf-8'))

	def change_pass(self, current_pass='', new_pass='', new_pass_confirmation=''):
		self.log('changing pass', True)
		errors = []
		try:
			if not current_pass:
				self.log('Tried changing pass without entering current pass')
				raise ValueError('Current pass not entered')
			if not self.is_correct_pass(current_pass):
				self.log('Tried changing pass but entered current pass was incorrect')
				raise ValueError('Incorrect pass')
		except Exception as e:
			errors.append(str(e))
		try:
			self.set_pass(new_pass, new_pass_confirmation)
		except Exception as e:
			errors.append(str(e))
		if errors:
			raise ValueError(', '.join(errors))

	# record utils
	@staticmethod
	def tags_to_string(tags):
		if len(tags):
			return '#' + ' #'.join(tags)
		return ''

	@staticmethod
	def string_to_tags(tags_string, tags_separator='#'):
		if not tags_string:
			return []
		tags = tags_string.split(tags_separator)
		clean_tags = []
		for tag in tags:
			tag = tag.strip(' #,')
			if '' != tag:
				clean_tags.append(tag)
		return clean_tags

	def parse_search_tags(self, tags=[]):
		import re
		if '' in tags:
			tags.remove('')
		file_ids = []
		filters = {}
		plain_tags = []
		if tags:
			for tag in tags:
				tag = tag.strip('#')
				# file ids
				if 'id:' == tag[:3]:
					file_ids.append(tag[3:])
				# publish time
				elif 'newer than:' == tag[:11]:
					value = tag[11:]
					if re.match('^[0-9]*$', value) and 4 < len(value):
						value = int(value)
					else:
						value = string_to_timestamp(value)
					if 'newer than' not in filters.keys() or filters['newer than'] < value:
						filters['newer than'] = value
				elif 'older than:' == tag[:11]:
					value = tag[11:]
					if re.match('^[0-9]*$', value) and 4 < len(value):
						value = int(value)
					else:
						value = string_to_timestamp(value)
					if 'older than' not in filters.keys() or filters['older than'] > value:
						filters['older than'] = value
				# file size
				elif 'smaller than:' == tag[:13]:
					value = int(tag[13:])
					if 'smaller than' not in filters.keys() or filters['smaller than'] > value:
						filters['smaller than'] = value
				elif 'larger than:' == tag[:12]:
					value = tag[12:]
					if 'larger than' not in filters.keys() or filters['larger than'] < value:
						filters['larger than'] = value
				# width
				elif 'wider than:' == tag[:11]:
					value = int(tag[11:])
					if 'wider than' not in filters.keys() or filters['wider than'] < value:
						filters['wider than'] = value
				elif 'narrower than:' == tag[:14]:
					value = int(tag[14:])
					if 'narrower than' not in filters.keys() or filters['narrower than'] > value:
						filters['narrower than'] = value
				# height
				elif 'taller than:' == tag[:12]:
					value = int(tag[12:])
					if 'taller than' not in filters.keys() or filters['taller than'] < value:
						filters['taller than'] = value
				elif 'shorter than:' == tag[:13]:
					value = int(tag[13:])
					if 'shorter than' not in filters.keys() or filters['shorter than'] > value:
						filters['shorter than'] = value
				# duration
				elif 'duration shorter than:' == tag[:22]:
					value = int(tag[22:])
					if 'duration shorter than' not in filters.keys() or filters['duration shorter than'] > value:
						filters['duration shorter than'] = value
				elif 'duration longer than:' == tag[:21]:
					value = int(tag[21:])
					if 'duration longer than' not in filters.keys() or filters['duration longer than'] < value:
						filters['duration longer than'] = value
				# orientation
				elif 'orientation:' == tag[:12]:
					filters['orientation'] = tag[12:]
				# mimetype
				elif 'mimetype:' == tag[:9]:
					filters['mimetype'] = tag[9:]
				# category
				elif 'category:' == tag[:9]:
					filters['category'] = tag[9:]
				# owner
				elif 'owner:' == tag[:6]:
					filters['owner'] = tag[6:]
				#TODO category, -category, mimetype, -mimetype
				# sort order and pagination
				elif 'order:' == tag[:6]:
					filters['order by'] = tag[6:]
				elif 'sort:' == tag[:5]:
					filters['sort'] = tag[5:]
					if filters['sort'].lower() in ['asc', 'ascending']:
						filters['sort'] = 'ASC'
					else:
						filters['sort'] = 'DESC'
				elif 'perpage:' == tag[:8]:
					filters['perpage'] = tag[8:]
				else:
					plain_tags.append(tag)
		return (file_ids, filters, plain_tags)

	def parse_file_record_row(self, row):
		file_id = row[0]
		file_record = {
			'file_id': file_id,
			'mimetype': row[1],
			'category': self.get_file_category(row[1]),
			'extension': row[2],
			'size': row[3],
			'width': row[4],
			'height': row[5],
			'duration': row[6],
			'upload_time': row[7],
			'publish_time': row[8],
			'owner': row[9],
			'tags': [],
			'thumbnail': '',
			'thumbnail_video_clip': '',
			'cover_file_id': '',
			'sets': {},
		}
		return file_record

	def populate_file_record_thumbnails(self, file_record):
		# thumbnail
		file_record['thumbnail'] = ''
		if os.path.exists(
			os.path.join(
				self.config['files_directory_path'],
				'thumbnail',
				file_record['file_id'] + '.' + self.config['thumbnail_format']
			)
		):
			file_record['thumbnail'] = self.config['thumbnail_format']

		#thumbnail video clip
		file_record['thumbnail_video_clip'] = ''
		if file_record['mimetype'] in self.category_mimes['video']:
			if os.path.exists(
				os.path.join(
					self.config['files_directory_path'],
					'thumbnail',
					file_record['file_id'] + '.webm'
				)
			):
				file_record['thumbnail_video_clip'] = 'webm'

	def populate_file_record_summaries(self, file_record):
		# summary
		file_record['summary'] = ''
		if os.path.exists(
			os.path.join(
				self.config['files_directory_path'],
				'summary',
				file_record['file_id'] + '.' + self.config['summary_format']
			)
		):
			file_record['summary'] = self.config['summary_format']

	def print_file_records(self, file_ids_order, file_records):
		self.log('printing file records', True)
		import json

		file_records_list = []
		for file_id in file_ids_order:
			file_records_list.append(file_records[file_id])

		print(json.dumps(file_records_list))

	def get_file_category(self, mimetype):
		self.log('getting file category', True)
		for (category, mimetypes) in self.category_mimes.items():
			if mimetype in mimetypes:
				return category
		for category in self.category_mimes.keys():
			if category == mimetype[:len(category)]:
				return category
		return 'unknown'

	# database
	def create_tables(self):
		self.log('creating tables', True)
		import sqlite3
		try:
			with sqlite3.connect(self.config['database_file_path']) as connection:
				cursor = connection.cursor()
				# files
				sql = '''CREATE TABLE IF NOT EXISTS files (
					file_id TEXT PRIMARY KEY,
					upload_time INTEGER NOT NULL,
					publish_time INTEGER NOT NULL,
					extension TEXT NOT NULL,
					mimetype TEXT NOT NULL,
					size INTEGER NOT NULL,
					width INTEGER NOT NULL,
					height INTEGER NOT NULL,
					duration INTEGER NOT NULL,
					owner TEXT NOT NULL,
					thumbnail TEXT NOT NULL,
					summary TEXT NOT NULL,
					clip TEXT NOT NULL,
					UNIQUE(file_id) ON CONFLICT IGNORE
				);'''
				cursor.execute(sql)
				# tags
				sql = '''CREATE TABLE IF NOT EXISTS tags (
					file_id TEXT NOT NULL,
					tag TEXT NOT NULL,
					UNIQUE(file_id, tag) ON CONFLICT IGNORE
				);'''
				cursor.execute(sql)
				connection.commit()
		except Exception as e:
			self.log(str(e))
			raise ValueError('Problem creating tables')

	def initialize_database(self):
		self.log('initializing database', True)
		if not os.path.exists(self.config['database_file_path']):
			try:
				with open(self.config['database_file_path'], 'a') as fh:
					fh.write('')
			except Exception as e:
				self.log(str(e))
				raise OSError('Problem initializing database')
		self.create_tables()

	def create_file_records(self, file_records=[]):
		self.log('creating file records', True)
		if 0 == len(file_records):
			raise KeyError('No file records provided')
		import sqlite3
		collision_file_ids = []
		try:
			with sqlite3.connect(self.config['database_file_path']) as connection:
				cursor = connection.cursor()
				# preflight check for existing ids
				values = []
				sql = 'SELECT file_id FROM files WHERE file_id IN ('
				for file_record in file_records:
					sql += '?,'
					values.append(file_record['file_id'])
				sql = sql[:-1] + ');'
				cursor.execute(sql, values)
				rows = cursor.fetchall()
				existing_file_ids = []
				for row in rows:
					existing_file_ids.append(row[0])

				# insert
				sql = 'INSERT INTO files (file_id, upload_time, publish_time, extension, mimetype, size, width, height, duration, owner) VALUES (?,?,?,?,?,?,?,?,?,?);'
				for file_record in file_records:
					if file_record['file_id'] in existing_file_ids:
						collision_file_ids.append(file_record['file_id'])
						continue
					cursor.execute(
						sql,
						(
							file_record['file_id'],
							file_record['upload_time'],
							file_record['publish_time'],
							file_record['extension'],
							file_record['mimetype'],
							file_record['size'],
							file_record['width'],
							file_record['height'],
							file_record['duration'],
							file_record['owner']
						)
					)
				connection.commit()
		except Exception as e:
			self.log(str(e))
			raise ValueError('Problem creating file records')
		return collision_file_ids

	def add_files_tags(self, file_ids=[], tags=[]):
		self.log('adding files tags', True)
		#TODO consolidate statements into a single statement
		import sqlite3
		if not file_ids:
			raise KeyError('No file IDs provided')
		if not tags:
			raise KeyError('No tags provided')
		sql = 'INSERT INTO tags (file_id, tag) VALUES (?,?);'
		try:
			with sqlite3.connect(self.config['database_file_path']) as connection:
				cursor = connection.cursor()
				for file_id in file_ids:
					for tag in tags:
						tag = tag.strip(' #,')
						if '' != tag:
							cursor.execute(sql, (file_id, tag))
				connection.commit()
		except Exception as e:
			self.log(str(e))
			raise ValueError('Problem adding files tags')

	def remove_files_tags(self, file_ids=[], tags=[]):
		self.log('removing files tags', True)
		#TODO consolidate statements into a single statement
		import sqlite3
		if not file_ids:
			raise KeyError('No file IDs provided')
		try:
			with sqlite3.connect(self.config['database_file_path']) as connection:
				cursor = connection.cursor()
				for file_id in file_ids:
					if not tags:
						sql = 'DELETE FROM tags where file_id = ?;'
						cursor.execute(sql, (file_id,))
					else:
						sql = 'DELETE FROM tags where file_id = ? AND tag = ?;'
						for tag in tags:
							cursor.execute(sql, (file_id, tag))
			connection.commit()
		except Exception as e:
			self.log(str(e))
			raise ValueError('Problem removing files tags')

	def replace_files_tags(self, file_ids, tags=[]):
		self.log('replacing files tags', True)
		self.remove_files_tags(file_ids=file_ids)
		if tags:
			self.add_files_tags(file_ids=file_ids, tags=tags)

	def remove_tags(self, tags=[]):
		self.log('removing tags', True)
		#TODO consolidate statements into a single statement
		import sqlite3
		if not tags:
			self.log('Tried to remove tags without providing any tags')
			raise KeyError('No tags provided')
		try:
			with sqlite3.connect(self.config['database_file_path']) as connection:
				cursor = connection.cursor()
				sql = 'DELETE FROM tags where tag = ?;'
				for tag in tags:
					cursor.execute(sql, (tag,))
				connection.commit()
		except Exception as e:
			self.log(str(e))
			raise ValueError('Problem removing tags')

	def replace_tag(self, old_tag, new_tag):
		self.log('replacing tag', True)
		import sqlite3
		if not old_tag:
			self.log('Tried to replace tag without providing source tag')
			raise KeyError('No source tag provided')
		if not new_tag:
			self.log('Tried to replace tag without providing replacement tag')
			raise KeyError('No replacement tag provided')
		try:
			with sqlite3.connect(self.config['database_file_path']) as connection:
				cursor = connection.cursor()
				sql = 'UPDATE tags SET tag = ? WHERE tag = ?';
				cursor.execute(sql, (new_tag, old_tag))
				connection.commit()
		except Exception as e:
			self.log(str(e))
			raise ValueError('Problem replacing tag')

	#TODO
	def accompany_tag(self, tag, new_tag):
		self.log('accompanying tag (TODO)', True)
		#TODO
		raise ValueError('Problem accompanying tag')

	def generate_set(self, file_ids, sync=False):
		self.log('generating set', True)
		if not file_ids:
			self.log('Tried generating set without providing file IDs')
			return
		if not sync:
			import uuid
			new_set_id = uuid.uuid4().hex
			self.add_files_tags(file_ids, tags=['set:' + new_set_id])
			return new_set_id
		# synchronize sets among the provided file ids
		import sqlite3
		try:
			with sqlite3.connect(self.config['database_file_path']) as connection:
				cursor = connection.cursor()
				sql = 'SELECT file_id, tag FROM tags WHERE tag LIKE ? AND file_id IN ('
				values = ['set:%']
				for file_id in file_ids:
					sql += '?,'
					values.append(file_id)
				sql = sql[:-1] + ');'
				cursor.execute(sql, values)
				rows = cursor.fetchall()
				file_sets = {}
				sets = []
				for row in rows:
					file_id = row[0]
					tag = row[1]
					set_id = tag[4:]
					if ':' in set_id:
						set_id = set_id[:set_id.rfind(':')]
					if file_id not in file_sets.keys():
						file_sets[file_id] = []
					if set_id not in file_sets[file_id]:
						file_sets[file_id].append(set_id)
					if set_id not in sets:
						sets.append(set_id)
				sql = 'INSERT INTO tags (file_id, tag) VALUES '
				values = []
				for set_id in sets:
					for (file_id, file_set_ids) in file_sets.items():
						if set_id not in file_set_ids:
							sql += '(?, ?),'
							values.append(file_id)
							values.append('set:' + set_id)
				sql = sql[:-1] + ';'
				cursor.execute(sql, values)
		except Exception as e:
			self.log(str(e))
			raise ValueError('Problem synchronizing sets')

	def get_tags_list(self, exclude_tags=[], exclude_future=False, include_meta=False):
		self.log('getting tags list', True)
		#TODO should exclude any tags that are on file_ids with excluded tags? or have another argument for exclude file_ids with tags
		import sqlite3
		import time
		current_time = int(time.time())

		try:
			with sqlite3.connect(self.config['database_file_path']) as connection:
				cursor = connection.cursor()
				sql = 'SELECT tag, COUNT(*) as count FROM tags WHERE'
				values = []
				if not exclude_tags:
					sql += ' 1'
				else:
					for tag in exclude_tags:
						sql += ' tag <> ? AND'
						values.append(tag)
					sql = sql[:-4]
				if exclude_future:
					sql += ' AND file_id NOT IN (SELECT file_id FROM files WHERE publish_time > ?)'
					values.append(current_time)
				sql += ' GROUP BY tag ORDER BY count DESC;'
				cursor.execute(sql, values)
				rows = cursor.fetchall()
				tags = []
				for row in rows:
					tags.append((row[0], row[1]))

				if include_meta:
					tags = tags + [
						'orientation:portrait', 'orientation:landscape',
						'order by:upload', 'order by:publish', 'order by:size', 'order by:width', 'order by:height', 'order by:duration',
						'sort:ascending', 'sort:descending']
					sql = 'SELECT DISTINCT mimetype FROM files WHERE'
					values = []
					if not exclude_tags:
						sql += ' 1'
					else:
						sql += ' file_id NOT IN (SELECT file_id FROM tags WHERE'
						for tag in exclude_tags:
							sql += ' tag == ? OR'
							values.append(tag)
						sql = sql[:-3] + ')'
					if exclude_future:
						sql += ' AND publish_time < ?'
						values.append(current_time)
					sql += ';'
					cursor.execute(sql , values)
					rows = cursor.fetchall()

					mimetype_to_category = {}
					for category, category_mimetypes in self.category_mimes.items():
						for category_mimetype in category_mimetypes:
							mimetype_to_category[category_mimetype] = category

					mimetypes = []
					already_added_categories = []
					for row in rows:
						mimetypes.append(row[0])
					for mimetype in mimetypes:
						tags.append(('mimetype:' + mimetype, 0))
						if mimetype not in mimetype_to_category.keys():
							continue
						category = mimetype_to_category[mimetype]
						if category not in already_added_categories:
							tags.append(('category:' + category, 0))
							already_added_categories.append(category)
				return tags
		except Exception as e:
			self.log(str(e))
			raise ValueError('Problem getting tags list')

	def cleanup_orphan_tags(self):
		self.log('cleaning up orphan tags', True)
		import sqlite3
		try:
			with sqlite3.connect(self.config['database_file_path']) as connection:
				cursor = connection.cursor()
				cursor.execute('DELETE FROM tags WHERE file_id not in (SELECT file_id FROM files WHERE 1)')
				connection.commit()
		except Exception as e:
			self.log(str(e))
			raise ValueError('Problem cleaning up orphan tags')

	def get_sets_file_records(self, cursor, file_records, file_ids_order, set_ids, sets):
		self.log('getting sets file records', True)
		if 0 == len(set_ids):
			return
		sql = 'SELECT file_id, mimetype, extension, size, width, height, duration, upload_time, publish_time, owner FROM files WHERE file_id in '
		sql += '(SELECT file_id from tags WHERE '
		values = []
		for set_id in set_ids:
			# skip if already got set id
			if set_id in values:
				continue
			sql += 'tag LIKE ? ESCAPE ? OR '
			values.append('set:' + set_id.replace('_', '\\_') + '%')
			values.append('\\')
		sql = sql[:-4] + ');'
		cursor.execute(sql, values)
		rows = cursor.fetchall()
		new_file_ids = []
		self.log('checking for set tags on ' + str(len(rows)) + ' files', True)
		for row in rows:
			file_id = row[0]
			# skip if already got file record
			if file_id in file_ids_order:
				continue
			set_file_record = self.parse_file_record_row(row)
			self.populate_file_record_thumbnails(set_file_record)
			self.populate_file_record_summaries(set_file_record)
			new_file_ids.append(set_file_record['file_id'])
			file_records[set_file_record['file_id']] = set_file_record
			# don't append to file_ids_order since that's just for search result ordering
			file_ids_order.append(set_file_record['file_id'])
		# get tags for set file records
		if 0 == len(new_file_ids):
			return
		sql = 'SELECT file_id, tag FROM tags WHERE file_id IN ('
		values = []
		for file_id in new_file_ids:
			sql += '?,'
			values.append(file_id)
		sql = sql[:-1] + ');'
		cursor.execute(sql, values)
		rows = cursor.fetchall()
		new_file_ids = []
		self.log('total tags being checked: ' + str(len(rows)), True)
		for row in rows:
			(file_id, tag) = row
			file_records[file_id]['tags'].append(tag)
			self.check_for_set_tag(file_id, tag, set_ids, sets)

	def get_covers_file_records(self, cursor, file_records, file_ids_order, cover_file_ids):
		self.log('getting covers files records', True)
		if 0 == len(cover_file_ids):
			return
		sql = 'SELECT  file_id, mimetype, extension, size, width, height, duration, upload_time, publish_time, owner FROM files WHERE file_id IN ('
		values = []
		new_file_records = 0
		for cover_file_id in cover_file_ids:
			# skip if already got file record
			if cover_file_id in file_ids_order:
				continue
			sql += '?,'
			new_file_records += 1
			values.append(cover_file_id)
		sql = sql[:-1] + ');'
		if 0 < new_file_records:
			cursor.execute(sql, values)
			rows = cursor.fetchall()
			for row in rows:
				cover_file_record = self.parse_file_record_row(row)
				self.populate_file_record_thumbnails(cover_file_record)
				self.populate_file_record_summaries(cover_file_record)
				file_records[cover_file_record['file_id']] = cover_file_record
				file_ids_order.append(cover_file_record['file_id'])

	def check_for_set_tag(self, file_id, tag, set_ids, sets):
		if 'set:' == tag[:4]:
			set_id = tag[4:]
			set_weight = 0
			if ':' in set_id:
				last_colon_index = set_id.rfind(':')
				try:
					set_weight = int(set_id[last_colon_index + 1:])
				except Exception as e:
					self.log('Non-integer set weight on file: ' + file_id)
					self.log(str(e))
					# ignore set weight gracefully if it's a non-integer value
					set_weight = 0
				set_id = set_id[:last_colon_index]
			if set_id not in set_ids:
				set_ids.append(set_id)
			if set_id not in sets.keys():
				sets[set_id] = []
			sets[set_id].append((file_id, int(set_weight)))

	def populate_file_record_sets(self, file_records, set_ids, sets):
		self.log('populating file record sets', True)
		ordered_sets = {}
		file_ids = []
		for (set_id, file_ids_weights) in sets.items():
			set_file_ids_by_time = {}
			set_file_ids_by_weight = {}
			for (file_id, weight) in file_ids_weights:
				if file_id not in file_records.keys():
					continue
				file_record = file_records[file_id]
				if set_id not in file_record['sets'].keys():
					file_record['sets'][set_id] = []
				if 0 < weight:
					set_file_ids_by_weight[weight] = file_id
				else:
					if file_record['publish_time'] not in set_file_ids_by_time.keys():
						set_file_ids_by_time[file_record['publish_time']] = []
					set_file_ids_by_time[file_record['publish_time']].append(file_id)
				if file_id not in file_ids:
					file_ids.append(file_id)
			set_file_ids_by_time = dict(sorted(set_file_ids_by_time.items()))

			ordered_set = []
			for unique_time in set_file_ids_by_time.keys():
				# for same exact publish time, sort alphabetically by file id
				set_file_ids = sorted(set_file_ids_by_time[unique_time])
				for set_file_id in set_file_ids:
					ordered_set.append(set_file_id)
			for (key, value) in sorted(set_file_ids_by_weight.items()):
				ordered_set.append(value)
			ordered_sets[set_id] = ordered_set
		for file_id in file_ids:
			file_record = file_records[file_id]
			set_ids = []
			for set_id in file_record['sets'].keys():
				if set_id in ordered_sets.keys():
					set_ids.append(set_id)
			file_record['sets'] = {}
			for set_id in set_ids:
				file_record['sets'][set_id] = ordered_sets[set_id]

	def search_file_records(self, file_ids=[], filters={}, tags=[], page=-1):
		self.log('searching file records', True)
		import sqlite3
		import math

		total_results = 0
		total_pages = 0
		perpage = int(self.config['default_files_per_page'])
		results_this_page = 0
		file_ids_order = []
		file_records = {}

		try:
			with sqlite3.connect(self.config['database_file_path']) as connection:
				values = []
				cursor = connection.cursor()
				sql = ''
				if not file_ids:
					sql += '1'
				else:
					sql += 'file_id IN ('
					for file_id in file_ids:
						sql += '?,'
						values.append(file_id)
					sql = sql[:-1] + ')'

				order_by = 'publish_time'
				sort = 'DESC'

				# filters
				if filters:
					# publish time
					if 'newer than' in filters.keys():
						sql += ' AND publish_time > ?'
						values.append(filters['newer than'])
					if 'older than' in filters.keys():
						sql += ' AND publish_time < ?'
						values.append(filters['older than'])
					# file size
					if 'smaller than' in filters.keys():
						sql += ' AND size < ?'
						values.append(filters['smaller than'])
					if 'larger than' in filters.keys():
						sql += ' AND size > ?'
						values.append(filters['larger than'])
					# width
					if 'wider than' in filters.keys():
						sql += ' AND width > ?'
						values.append(filters['wider than'])
					if 'narrower than' in filters.keys():
						sql += ' AND width < ?'
						values.append(filters['narrower than'])
					# height
					if 'taller than' in filters.keys():
						sql += ' AND height > ?'
						values.append(filters['taller than'])
					if 'shorter than' in filters.keys():
						sql += ' AND height < ?'
						values.append(filters['shorter than'])
					# duration
					if 'duration shorter than' in filters.keys():
						sql += ' AND duration < ?'
						values.append(filters['duration shorter than'])
					if 'duration longer than' in filters.keys():
						sql += ' AND duration > ?'
						values.append(filters['duration longer than'])
					# orientation
					if 'orientation' in filters.keys():
						if 'portrait' == filters['orientation']:
							sql += ' AND width < height'
						elif 'landscape' == filters['orientation']:
							sql += ' AND height < width'
					# mimetype
					if 'mimetype' in filters.keys():
						sql += ' AND mimetype = ?'
						values.append(filters['mimetype'])
					# category
					if 'category' in filters.keys():
						if filters['category'] in self.category_mimes:
							sql += ' AND ('
							mimetype_sql = ''
							for mimetype in self.category_mimes[filters['category']]:
								mimetype_sql += ' mimetype = ? OR'
								values.append(mimetype)
							if '' != mimetype_sql:
								mimetype_sql = mimetype_sql[:-3]
							sql += mimetype_sql + ')'
					# owner
					if 'owner' in filters.keys():
						sql += ' AND owner = ?'
						values.append(filters['owner'])
					# order by, sort, and pagination
					if 'order by' in filters.keys():
						if filters['order by'] in ['upload', 'publish', 'size', 'height', 'width', 'duration']:
							if filters['order by'] in ['upload', 'publish']:
								filters['order by'] += '_time'
							order_by = filters['order by']
					if 'sort' in filters.keys():
						sort = filters['sort']
					if 'perpage' in filters.keys():
						perpage = int(filters['perpage'])

				# tags
				if tags:
					# hard limit on tags
					if 100 < len(tags):
						tags = tags[:100]
					plain_tags = []
					negation_plain_tags = []
					partial_tags = []
					negation_partial_tags = []
					for tag in tags:
						tag = tag.strip()
						if '~' == tag[:1]:
							partial_tags.append('%' + tag[1:].replace('_', '\\_') + '%')
						elif '-~' == tag[:2]:
							negation_partial_tags.append('%' + tag[2:].replace('_', '\\_') + '%')
						elif '-' == tag[:1]:
							negation_plain_tags.append(tag[1:])
						elif tag:
							plain_tags.append(tag)

					tag_col = 'tag'
					tag_value = '?'
					# case insensitivity
					if True:
						tag_col = 'LOWER(tag)'
						tag_value = 'LOWER(?)'
						tag = tag.lower()
					for tag in plain_tags:
						sql += ' AND file_id IN (SELECT file_id FROM tags WHERE ' + tag_col + ' == ' + tag_value + ')'
						values.append(tag)
					for tag in negation_plain_tags:
						sql += ' AND file_id NOT IN (SELECT file_id FROM tags WHERE ' + tag_col + ' == ' + tag_value + ')'
						values.append(tag)
					for tag in partial_tags:
						sql += ' AND file_id IN (SELECT file_id FROM tags WHERE ' + tag_col + ' LIKE ' + tag_value + ')'
						values.append(tag)
						values.append('\\')
					for tag in negation_partial_tags:
						sql += ' AND file_id NOT IN (SELECT file_id FROM tags WHERE ' + tag_col + ' LIKE ' + tag_value + ' ESCAPE ?)'
						values.append(tag)
						values.append('\\')

				# count total possible results
				cursor.execute('SELECT COUNT(file_id), SUM(size) FROM files WHERE ' + sql + ';', values)
				(total_results, total_size) = cursor.fetchone()
				total_pages = math.ceil(total_results / perpage)

				if 0 == total_results:
					connection.commit()
					return (0, 0, 0, perpage, [], {}, 0)

				# get requested page of results
				sql = 'SELECT file_id, mimetype, extension, size, width, height, duration, upload_time, publish_time, owner FROM files WHERE ' + sql + ' ORDER BY ' + order_by + ' ' + sort + ', file_id ' + sort
				if -1 != page:
					sql += ' LIMIT ' + str(perpage) + ' OFFSET ' + str(int(perpage) * int(page))
				else:
					# hardcoded max limit of non-paginated results
					sql += ' LIMIT 999'
				sql += ';'
				cursor.execute(sql, values)
				rows = cursor.fetchall()
				# compose file records and get tags
				self.log('checking for set tags on ' + str(len(rows)) + ' files', True)
				if 0 == len(rows):
					rows = []
				else:
					values = []
					sql = 'SELECT file_id, tag FROM tags WHERE file_id IN ('
					for row in rows:
						file_id = row[0]
						file_record = self.parse_file_record_row(row)
						self.populate_file_record_thumbnails(file_record)
						self.populate_file_record_summaries(file_record)
						file_records[file_record['file_id']] = file_record
						file_ids_order.append(file_record['file_id'])
						sql += '?,'
						values.append(file_record['file_id'])
						results_this_page += 1
					sql = sql[:-1]
					sql += ');'
					cursor.execute(sql, values)
					rows = cursor.fetchall()
				set_ids = []
				sets = {}
				cover_file_ids = []
				self.log('total tags being checked: ' + str(len(rows)), True)
				for row in rows:
					(file_id, tag) = row
					file_records[file_id]['tags'].append(tag)
					self.check_for_set_tag(file_id, tag, set_ids, sets)
				# sets
				self.get_sets_file_records(cursor, file_records, file_ids_order, set_ids, sets)
				self.populate_file_record_sets(file_records, set_ids, sets)
				cover_file_ids = []
				for file_id in file_ids_order:
					file_record = file_records[file_id]
					for tag in file_record['tags']:
						if 'cover:' == tag[:6]:
							cover_file_id = tag[6:]
							cover_file_ids.append(cover_file_id)
							file_records[file_id]['cover_file_id'] = cover_file_id
				# covers
				self.get_covers_file_records(cursor, file_records, file_ids_order, cover_file_ids)
				connection.commit()
		except Exception as e:
			self.log(str(e))
			raise ValueError('Problem getting file records')
		return (total_results, total_pages, total_size, perpage, file_ids_order, file_records, results_this_page)

	def remove_file_records(self, file_ids):
		self.log('removing file records', True)
		import sqlite3
		try:
			with sqlite3.connect(self.config['database_file_path']) as connection:
				cursor = connection.cursor()
				files_sql = 'DELETE from files WHERE file_id IN ('
				tags_sql = 'DELETE from tags WHERE file_id IN ('
				values = []
				for file_id in file_ids:
					files_sql += '?,'
					tags_sql += '?,'
					values.append(file_id)
				if 0 < len(values):
					cursor.execute(files_sql[:-1] + ');', values)
					cursor.execute(tags_sql[:-1] + ');', values)
				connection.commit()
		except Exception as e:
			self.log(str(e))
			raise ValueError('Problem removing file records')

	#TODO needs to parse multipart nudge (e.g. 1d12h54m10s)
	@staticmethod
	def parse_datetime_string(datetime_string=''):
		import time
		time_adjustment_direction = ''
		time_adjustment = 0
		new_time = 0
		if '' == datetime_string:
			new_time = int(time.time())
		elif datetime_string[0] in ['+', '-']:
			unit = datetime_string[-1:]
			time_adjustment = int(datetime_string[1:-1])

			if 'm' == unit:
				time_adjustment = time_adjustment * 60
			elif 'h' == unit:
				time_adjustment = time_adjustment * 60 * 60
			elif 'd' == unit:
				time_adjustment = time_adjustment * 60 * 60 * 24
			elif 'w' == unit:
				time_adjustment = time_adjustment * 60 * 60 * 24 * 7
			elif 'y' == unit:
				time_adjustment = time_adjustment * 60 * 60 * 24 * 365

			if '-' == datetime_string[0]:
				time_adjustment_direction = '-'
			else:
				time_adjustment_direction = '+'
		else:
			new_time = string_to_timestamp(datetime_string)
		return (new_time, time_adjustment_direction, time_adjustment)

	def set_publish_time(self, file_ids, datetime_string=''):
		self.log('setting publish time', True)
		import sqlite3

		if 0 == len(file_ids):
			self.log('Tried to set publish time without providing any file IDs')
			return

		(new_time, time_adjustment_direction, time_adjustment) = self.parse_datetime_string(datetime_string)

		try:
			with sqlite3.connect(self.config['database_file_path']) as connection:
				cursor = connection.cursor()
				sql = 'UPDATE files SET publish_time = '
				values = []
				if time_adjustment_direction in ['+', '-']:
					sql += 'publish_time ' + time_adjustment_direction + ' ' + str(time_adjustment) + ' WHERE '
				else:
					sql += '? WHERE file_id IN ('
					values.append(new_time)

				for file_id in file_ids:
					sql += '?,'
					values.append(file_id)
				cursor.execute(sql[:-1] + ');', values)
				connection.commit()
		except Exception as e:
			self.log(str(e))
			raise ValueError('Problem setting file publish times')

	# files
	def generate_thumbnail(self, file_record):
		self.log('generating thumbnail', True)
		# skip creating thumbnails for text files
		if 'text' == file_record['category']:
			return []
		file_path = os.path.join(self.config['files_directory_path'], 'original', file_record['file_id'] + '.' + file_record['extension'])
		file_image_path = file_path

		errors = []

		thumbnail_created = False
		try:
			thumbnail_created = ffmpeg_create_thumbnail(
				file_image_path,
				os.path.join(self.config['files_directory_path'], 'thumbnail', file_record['file_id'] + '.' + self.config['thumbnail_format']),
				ffmpeg_path=self.config['ffmpeg_path'],
				thumbnail_edge=self.config['thumbnail_edge'],
				thumbnail_jpg_quality=self.config['thumbnail_jpg_quality'],
				thumbnail_webp_quality=self.config['thumbnail_webp_quality']
			)
		except Exception as e:
			self.log(str(e))
		if not thumbnail_created:
			self.log('Failed thumbnail creation for file ' + file_record['file_id'])
			errors.append('Failed thumbnail creation')

		if self.config['generate_thumbnail_video_clips']:
			if file_record['mimetype'] in self.category_mimes['video']:
				thumbnail_video_clip_created = False
				try:
					thumbnail_video_clip_created = ffmpeg_create_thumbnail_video_clip(
						file_path,
						os.path.join(self.config['files_directory_path'], 'thumbnail', file_record['file_id'] + '.webm'),
						ffmpeg_path=self.config['ffmpeg_path'],
						file_duration_ms=file_record['duration'],
						thumbnail_edge=self.config['thumbnail_edge'],
						thumbnail_video_clip_webm_quality=self.config['thumbnail_video_clip_webm_quality'],
						thumbnail_video_clip_duration_ms=self.config['thumbnail_video_clip_duration_ms'],
					)
				except Exception as e:
					self.log(str(e))
				if not thumbnail_video_clip_created:
					self.log('Failed thumbnail video clip creation for file ' + file_record['file_id'])
					errors.append('Failed thumbnail video clip creation')
		return errors

	def generate_thumbnails(self, file_records):
		self.log('generating thumbnails', True)
		if not self.config['generate_thumbnails']:
			return []
		results = []
		for file_record in file_records:
			result = {'file_record': file_record}
			result['errors'] = self.generate_thumbnail(file_record)
			results.append(result)
		return results

	def generate_summary(self, file_record):
		self.log('generating summary', True)
		if 'image' != file_record['category'] or ('image/gif' == file_record['mimetype'] and file_record['duration']):
			return []

		file_path = os.path.join(self.config['files_directory_path'], 'original', file_record['file_id'] + '.' + file_record['extension'])
		file_image_path = file_path

		file_size = os.path.getsize(file_image_path)
		if file_size <= self.config['summary_filesize_threshold_bytes']:
			return []

		errors = []

		destination_file_path = os.path.join(self.config['files_directory_path'], 'summary', file_record['file_id'] + '.' + self.config['summary_format'])

		summary_created = False
		try:
			summary_created = ffmpeg_create_thumbnail(
				file_image_path,
				destination_file_path,
				ffmpeg_path=self.config['ffmpeg_path'],
				thumbnail_edge=self.config['summary_edge'],
				thumbnail_jpg_quality=self.config['summary_jpg_quality'],
				thumbnail_webp_quality=self.config['summary_webp_quality']
			)
		except Exception as e:
			self.log(str(e))
		if not summary_created:
			self.log('Failed summary creation for file ' + file_record['file_id'])
			errors.append('Failed summary creation')
		# check if newly created summary is larger than original image
		elif file_size < os.path.getsize(destination_file_path):
			# remove created file
			os.remove(destination_file_path)
			self.log('Created summary for file ' + file_record['file_id'] + ' was larger than original file')
			errors.append('Created summary was larger than original file')

		return errors

	def generate_summaries(self, file_records):
		self.log('generating summaries', True)
		if not self.config['generate_summaries']:
			return []
		results = []
		for file_record in file_records:
			result = {'file_record': file_record}
			result['errors'] = self.generate_summary(file_record)
			results.append(result)
		return results

	def store_files(self, source_file_paths, tags=[], datestring='', filenames=[]):
		self.log('storing files', True)
		import shutil
		import time

		current_time = int(time.time())

		file_ids_to_filenames = {}

		(new_time, time_adjustment_direction, time_adjustment_seconds) = self.parse_datetime_string(datestring)

		if time_adjustment_direction in ['+', '-']:
			if '-' == time_adjustment_direction:
				time_adjustment_seconds = time_adjustment_seconds * -1
			new_time = current_time + time_adjustment_seconds

		file_records = {}
		for source_file_path in source_file_paths:
			# get index of this file
			index = source_file_paths.index(source_file_path)

			# replace doubled backslashes and unify directory separators
			source_file_path = source_file_path.replace('\\\\', '\\').replace('\\', '/')

			# get md5 and file id
			import hashlib
			import base64
			hash_fn = hashlib.new('md5')
			with open(source_file_path, 'rb') as fh:
				# Read the file in chunks of 8192 bytes
				while chunk := fh.read(8192):
					hash_fn.update(chunk)
			md5 = hash_fn.hexdigest()
			file_id = base64.urlsafe_b64encode(hash_fn.digest()).decode('utf-8').strip('=')

			# get filename for this file
			if filenames:
				file_ids_to_filenames[file_id] = filenames[index]

			# get file mimetype and extension
			try:
				import puremagic
				puremagic_result = puremagic.magic_file(source_file_path)[0]
				(file_mimetype, file_extension) = (puremagic_result.mime_type, puremagic_result.extension.strip('.'))
			except Exception as e:
				self.log('Problem using puremagic to get mimetype of file ' + file_id)

				import mimetypes
				file_mimetype = ''
				# in python earlier than 3.13 mimetypes uses guess_type for both url and path
				if 13 > float(sys.version_info[1]):
					(file_mimetype, _) = mimetypes.guess_type(source_file_path)
				# in python 3.13 and later mimetypes uses guess_file_type for paths
				else:
					file_mimetype = mimetypes.guess_file_type(source_file_path)
				file_extension = mimetypes.guess_extension(file_mimetype).strip('.')

			if file_extension in ['jpeg', 'jpe']:
				file_extension = 'jpg'
			file_size = os.path.getsize(source_file_path)
			width = 0
			height = 0
			duration = 0
			# dimensions
			if file_mimetype in self.category_mimes['image'] or file_mimetype in self.category_mimes['video']:
				(width, height) = ffprobe_get_dimensions(source_file_path, self.config['ffprobe_path'])
			# duration
			if 'image/gif' == file_mimetype:
				duration = ffprobe_get_gif_duration_frames(source_file_path, self.config['ffprobe_path'])
			elif file_mimetype in self.category_mimes['video']:
				duration = ffprobe_get_video_duration_ms(source_file_path, self.config['ffprobe_path'])
			elif file_mimetype in self.category_mimes['audio']:
				duration = ffprobe_get_audio_duration_ms(source_file_path, self.config['ffprobe_path'])

			original_file_path = os.path.join(self.config['files_directory_path'], 'original', file_id + '.' + file_extension)
			try:
				shutil.move(source_file_path, original_file_path)
			except Exception as e:
				self.log('Problem moving file ' + file_id + ' to destination')
			file_record = {
				'file_id': file_id,
				'mimetype': file_mimetype,
				'category': self.get_file_category(file_mimetype),
				'extension': file_extension,
				'size': file_size,
				'width': width,
				'height': height,
				'duration': duration,
				'upload_time': current_time,
				'publish_time': new_time,
				'owner': self.provided_token,
				'tags': [],
				'thumbnail': '',
				'thumbnail_video_clip': '',
				'cover_file_id': '',
				'sets': {},
			}

			thumbnail_errors = self.generate_thumbnail(file_record)
			if 0 < len(thumbnail_errors):
				file_record['errors'] = thumbnail_errors

			summary_errors = self.generate_summary(file_record)

			if 0 < len(summary_errors):
				file_record['errors'] = summary_errors

			file_records[file_id] = file_record

		collision_file_ids = self.create_file_records(file_records.values())
		for collision_file_id in collision_file_ids:
			if collision_file_id in file_records.keys():
				file_records[collision_file_id]['errors'] = ['File already exists']
		if tags:
			self.add_files_tags(file_records.keys(), tags)

		if file_ids_to_filenames:
			for file_id in file_ids_to_filenames:
				filename = file_ids_to_filenames[file_id]
				self.add_files_tags([file_id], ['filename:' + filename])

		for file_id in file_records.keys():
			self.populate_file_record_summaries(file_records[file_id])
			self.populate_file_record_thumbnails(file_records[file_id])

		return file_records

	def remove_files(self, file_ids):
		self.log('removing files', True)
		(_, _, _, _, _, file_records, _) = self.search_file_records(file_ids=file_ids)
		for file_id in file_ids:
			paths = [
				# original
				os.path.join(
					self.config['files_directory_path'],
					'original',
					file_id + '.' + file_records[file_id]['extension']
				),
			]
			for path in paths:
				if os.path.isfile(path):
					os.remove(path)
		self.remove_file_thumbnails(file_ids)
		self.remove_file_summaries(file_ids)
		self.remove_file_records(file_ids)
		self.remove_files_tags(file_ids=file_ids)

	def remove_file_thumbnails(self, file_ids):
		self.log('removing file thumbnails', True)
		for file_id in file_ids:
			paths = [
				# thumbnails
				os.path.join(
					self.config['files_directory_path'],
					'thumbnail',
					file_id + '.jpg'
				),
				os.path.join(
					self.config['files_directory_path'],
					'thumbnail',
					file_id + '.webp'
				),
				# thumbnail video clips
				os.path.join(
					self.config['files_directory_path'],
					'thumbnail',
					file_id + '.webm'
				),
			]
			for path in paths:
				if os.path.isfile(path):
					os.remove(path)

	def remove_file_summaries(self, file_ids):
		self.log('removing file summaries', True)
		for file_id in file_ids:
			paths = [
				# summaries
				os.path.join(
					self.config['files_directory_path'],
					'summary',
					file_id + '.jpg'
				),
				os.path.join(
					self.config['files_directory_path'],
					'summary',
					file_id + '.webp'
				),
			]
			for path in paths:
				if os.path.isfile(path):
					os.remove(path)

	def rebuild_files(self, file_ids):
		self.log('rebuilding files', True)
		self.remove_file_thumbnails(file_ids)
		self.remove_file_summaries(file_ids)
		(_, _, _, _, _, file_records, _) = self.search_file_records(file_ids)
		generate_file_records = []
		for file_id in file_ids:
			if file_id in file_records:
				generate_file_records.append(file_records[file_id])
		self.generate_thumbnails(generate_file_records)
		self.generate_summaries(generate_file_records)

	#TODO
	def cleanup_orphan_thumbnails(self):
		#TODO get all file_ids from filenames in files_directory_path thumbnail/
		#TODO search for file_ids
		#TODO remove files and thumbnails where no record was found for that file_id
		pass

	#TODO
	def cleanup_orphan_files(self):
		#TODO get all file_ids from filenames in files_directory_path original/
		#TODO search for file_ids
		#TODO remove files and thumbnails where no record was found for that file_id
		pass
