2021-07-14 23:55:45 +02:00
#! /usr/bin/env python3
#takes directory, converts all .adoc files to html files, copying the resulting html files to an identical directory strucuture, and copies over all non .adoc files unchanged. Optionally outputs as a tar.gz file.
2021-09-07 18:54:14 +02:00
import subprocess , sys , argparse , logging , tempfile , shutil , os , glob
2021-07-14 23:55:45 +02:00
from pathlib import Path
2021-09-05 07:07:48 +02:00
logging . basicConfig ( format = ' %(asctime)s : %(message)s ' , level = logging . INFO )
#logging.basicConfig(format='%(asctime)s:%(message)s', level=logging.DEBUG)
2021-07-14 23:55:45 +02:00
def parse_arguments ( ) :
2021-09-05 07:07:48 +02:00
parser = argparse . ArgumentParser ( description = ' create a website directory structure by converting .adoc files in a directory strucutre to .html files. ' )
2021-07-14 23:55:45 +02:00
parser . add_argument ( ' inputDir ' , type = Path , help = ' The directory of adoc files to be copied and converted. ' )
parser . add_argument ( ' -o ' , ' --output ' , type = Path , help = ' What to name the generated directory or tar file ' )
2022-01-13 22:54:00 +01:00
parser . add_argument ( ' --stylesheet ' , type = Path , help = ' A custom CSS file to be applied to the output. ' )
2021-09-26 19:32:22 +02:00
parser . add_argument ( ' --exclude-file ' , type = Path , help = ' A text file containing glob patterns to exclude, 1 per line. ' )
2021-09-17 04:16:34 +02:00
parser . add_argument ( ' --exclude ' , nargs = ' + ' , help = ' A list of glob patterns to ignore. Remember to quote them so your shell doesnt escape them! ' )
2021-09-26 19:32:22 +02:00
parser . add_argument ( ' -z ' , ' --compress ' , action = ' store_true ' , help = ' whether to compress the resulting directory to a tar.gz file. can be usefull for scripting to transfer the site to a remote server. ' )
2022-01-10 05:52:10 +01:00
parser . add_argument ( ' -v ' , ' --verbose ' , action = ' store_true ' , help = ' outputs debug messages onto the console. ' )
2021-07-14 23:55:45 +02:00
args = parser . parse_args ( )
2022-01-10 05:52:10 +01:00
#setting log level
if args . verbose :
logging . info ( ' setting log level to verbose ' )
logging . getLogger ( ) . setLevel ( level = logging . DEBUG )
2021-08-15 17:10:25 +02:00
#set compress flag
2021-07-15 21:13:00 +02:00
if args . output != None and not args . compress :
2021-07-14 23:55:45 +02:00
#detect based on whether outFile has a .tar.gz filename.
if args . output . suffixes == [ ' .tar ' , ' .gz ' ] :
2021-09-05 07:07:48 +02:00
compress = True
2021-07-14 23:55:45 +02:00
else :
2021-09-05 07:07:48 +02:00
compress = False
2021-07-14 23:55:45 +02:00
else :
2021-09-05 07:07:48 +02:00
compress = args . compress
2021-07-14 23:55:45 +02:00
2021-08-15 17:10:25 +02:00
#If outfile was not set, set it.
2021-07-14 23:55:45 +02:00
if args . output == None :
2021-09-05 20:30:05 +02:00
baseName = args . inputDir . with_name ( args . inputDir . name + ' _compiled ' ) . name
outFile = Path ( os . getcwd ( ) ) . joinpath ( baseName )
2021-07-14 23:55:45 +02:00
else :
2021-09-12 03:10:14 +02:00
outFile = args . output . resolve ( )
2021-07-14 23:55:45 +02:00
2021-08-15 17:10:25 +02:00
#add .tar.gz if compress is set and the outfile does not already have it.
2021-07-15 21:13:00 +02:00
if compress and outFile . suffixes != [ ' .tar ' , ' .gz ' ] :
2021-08-15 17:10:25 +02:00
logging . info ( f ' outFile was { outFile } , corrected because compress flag is set. ' )
2021-09-05 07:07:48 +02:00
outFile = outFile . with_suffix ( ' .tar.gz ' ) . resolve ( )
2021-07-14 23:55:45 +02:00
2021-08-15 17:10:25 +02:00
if args . inputDir . resolve ( ) == outFile . resolve ( ) :
raise FileExistsError ( ' output file cannot have the same path as the input file! ' )
2021-07-15 21:13:00 +02:00
logging . debug ( f ' inputing from { args . inputDir . resolve ( ) } ' )
logging . info ( f ' outputting to { outFile . resolve ( ) } ' )
logging . debug ( f ' compress is { compress } ' )
2022-01-10 07:45:51 +01:00
exclude = [ ]
if args . exclude_file != None :
2021-09-27 21:52:02 +02:00
with open ( args . exclude_file , ' r ' ) as file :
exclude = [ glob . strip ( ) for glob in file ]
2021-09-26 19:32:22 +02:00
2022-01-10 07:45:51 +01:00
if args . exclude != None :
exclude . extend ( args . exclude )
2021-09-27 21:52:02 +02:00
2022-01-10 04:55:23 +01:00
if not args . inputDir . resolve ( ) . exists ( ) :
2021-09-27 21:52:02 +02:00
print ( f ' Inputdir { args . inputDir . resolve ( ) } does not exist! ' )
exit ( )
2021-09-26 19:32:22 +02:00
2022-01-13 22:54:00 +01:00
stylesheet = None
if args . stylesheet != None :
stylesheet = args . stylesheet . resolve ( )
logging . info ( f ' using stylesheet { stylesheet } ' )
return args . inputDir . resolve ( ) , outFile , stylesheet , compress , exclude
2021-07-16 00:00:39 +02:00
2021-08-15 17:10:25 +02:00
#Doing it in a tmpDir first, as some distrubutions put temp files on a ramdisk. this should speed up the operation sigificantly.
2021-07-16 00:00:39 +02:00
class TmpDir :
2021-09-17 04:16:34 +02:00
def __init__ ( self , srcDir , exclude ) :
2021-09-05 07:07:48 +02:00
self . tmpDir = tempfile . TemporaryDirectory ( )
2021-09-02 00:42:19 +02:00
logging . debug ( f ' making tmp file from { srcDir } at { self . tmpDir . name } ' )
2021-09-05 07:07:48 +02:00
self . path = self . tmpDir . name + ' / ' + Path ( srcDir ) . resolve ( ) . name
2021-09-17 04:16:34 +02:00
self . ignorePatterns = [ ' *.adoc ' , ' .gitignore ' , ' .git/* ' ]
self . ignorePatterns . extend ( exclude )
self . ignorePattern = shutil . ignore_patterns ( * self . ignorePatterns )
2021-09-05 07:07:48 +02:00
shutil . copytree ( srcDir , self . path , ignore = self . ignorePattern , symlinks = False )
2021-07-16 00:00:39 +02:00
2021-08-15 17:10:25 +02:00
#copy out from tmpDir (which may be in RAM, depending on distrubution) to disk
2021-09-05 07:07:48 +02:00
def copy_self_to ( self , destPath ) :
logging . debug ( f ' outputting to { Path ( destPath ) . resolve ( ) } ' )
shutil . copytree ( self . path , destPath , symlinks = False )
2021-07-16 00:00:39 +02:00
2021-08-15 17:10:25 +02:00
#copy out from tmpDir (which may be in RAM, depending on distrubution) to a compressed file on disk
2021-07-16 00:00:39 +02:00
def compress_and_copy_self_to ( self , destPath ) :
2021-08-10 22:17:04 +02:00
#shutil.make_archive wants destPath to be without file extentions for some godforsaken reason.
2021-09-05 07:07:48 +02:00
destPath = Path ( destPath . with_name ( destPath . name . split ( ' . ' ) [ 0 ] ) ) . resolve ( )
logging . debug ( f ' compressing to { Path ( destPath ) . resolve ( ) } from { Path ( self . path ) . parent } ' )
tarFile = shutil . make_archive ( destPath , ' gztar ' , Path ( self . path ) . parent )
2021-07-16 00:00:39 +02:00
def cleanup ( self ) :
2021-08-10 22:17:04 +02:00
self . tmpDir . cleanup ( )
2021-07-16 00:00:39 +02:00
2021-09-07 18:54:14 +02:00
#works on the current working directory
def find_paths_to_convert ( fileNameGlob ) :
return glob . glob ( f ' **/ { fileNameGlob } ' , recursive = True )
2021-09-02 00:42:19 +02:00
2022-01-10 07:45:51 +01:00
#finds the depth of a file relative to given directory
def find_relative_file_depth ( subfile , parentDir ) :
subfile = Path ( subfile ) . resolve ( )
parentDir = Path ( parentDir ) . resolve ( )
return len ( subfile . parts ) - len ( parentDir . parts ) - 1
2021-09-04 19:58:10 +02:00
#simple wrapper around the asciidoctor cli.
2022-01-13 22:54:00 +01:00
def convert_file ( inDir : Path , outDir : Path , inFile : Path , stylesheet : Path ) :
2022-01-10 07:45:51 +01:00
#in order for the stylesdir and imagesdir to be linked to correctly, we need to know the relative depth between the two directories.
depth = find_relative_file_depth ( inFile , inDir )
2021-09-04 20:11:31 +02:00
logging . info ( f ' converting { Path ( inFile ) . resolve ( ) } ' )
2022-01-13 22:54:00 +01:00
logging . debug ( f ' converting { inFile =} , { outDir =} , { inDir =} , { stylesheet =} ' )
2022-01-10 07:45:51 +01:00
depthstring = ' ../ ' * depth
2022-01-13 22:54:00 +01:00
arguments = [ ' asciidoctor ' ,
2022-01-10 07:45:51 +01:00
#makes the stylesheet linked, but still includes it in the output.
' --attribute=linkcss ' ,
f ' --attribute=stylesdir= { depthstring } css ' ,
#set imagesdir
f ' --attribute=imagesdir= { depthstring } images ' ,
2021-09-05 07:07:48 +02:00
#specifies the source directory root.
f ' --source-dir= { inDir } ' ,
#Destination dir. It takes the file from the subtree --source-dir and puts it in the equivilant location in the subtree --destination-dir. (talking about filesystem subtrees).
f ' --destination-dir= { outDir } ' ,
2022-01-13 22:54:00 +01:00
inFile ]
if stylesheet != None :
arguments . insert ( 1 , f ' --attribute=copycss= { stylesheet } ' )
arguments . insert ( 1 , f ' --attribute=stylesheet= { stylesheet . name } ' )
else :
arguments . insert ( 1 , f ' --attribute=copycss ' )
logging . debug ( f ' { arguments =} ' )
try :
#the destdir can be used instead of destfile in order to preserve the directory structure relative to the base dir. really useful.
subprocess . run ( arguments , check = True )
2021-09-04 19:58:10 +02:00
except Exception as e :
logging . error ( f ' could not convert { inFile } ! ' )
2022-01-13 22:54:00 +01:00
logging . error ( f ' { e } ' )
2021-09-04 19:58:10 +02:00
2021-09-05 18:02:51 +02:00
if __name__ == ' __main__ ' :
2022-01-13 22:54:00 +01:00
inFile , outFile , stylesheet , compress , exclude = parse_arguments ( )
2021-09-05 18:02:51 +02:00
os . chdir ( inFile )
2021-09-17 04:16:34 +02:00
tmpDir = TmpDir ( ' ./ ' , exclude )
2021-09-07 18:54:14 +02:00
pathsToConvert = find_paths_to_convert ( ' *.adoc ' )
2021-09-05 07:07:48 +02:00
2021-09-05 18:02:51 +02:00
for i in pathsToConvert :
2022-01-13 22:54:00 +01:00
convert_file ( inDir = ' ./ ' , outDir = tmpDir . path , inFile = i , stylesheet = stylesheet )
2021-09-05 07:07:48 +02:00
2021-09-05 18:02:51 +02:00
if compress :
tmpDir . compress_and_copy_self_to ( outFile )
else :
tmpDir . copy_self_to ( outFile )
2021-09-05 07:07:48 +02:00
2021-09-05 18:02:51 +02:00
tmpDir . cleanup ( )