aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeffrey Armstrong <jeff@approximatrix.com>2022-02-25 17:22:43 -0500
committerJeffrey Armstrong <jeff@approximatrix.com>2022-02-25 17:22:43 -0500
commit452ceb636e37fab2927c688e7b495841879ea29a (patch)
treedee91a09e774c998f6b978f1ef4fa780e18029b1
downloadfpoint-452ceb636e37fab2927c688e7b495841879ea29a.tar.gz
fpoint-452ceb636e37fab2927c688e7b495841879ea29a.zip
Initial code commit
-rw-r--r--LICENSE21
-rw-r--r--README.md23
-rw-r--r--driver.f90133
-rw-r--r--fpoint.prj78
-rw-r--r--fsutil.F9088
-rw-r--r--pptxml.f90664
-rw-r--r--pptxzip.f90204
-rw-r--r--winsupport.c30
8 files changed, 1241 insertions, 0 deletions
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..fd3b407
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+Copyright (c) 2022 Approximatrix, LLC <support@approximatrix.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+The Software shall be used for Good, not Evil.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..63aa50c
--- /dev/null
+++ b/README.md
@@ -0,0 +1,23 @@
+FPoint - PowerPoint to Text Converter
+=====================================
+
+This program is a crude attempt to extract text from PowerPoint
+presentations files (*.pptx only, no legacy) written almost entirely in
+Fortran. The code attempts to decipher what is meant by each slide's
+structure in a vague manner. The text generated, at this time, is
+loosely based on gemtext, generally considered a subset of Markdown.
+
+Please see LICENSE for the license terms.
+
+Requirements
+------------
+
+This program requires the following:
+
+ Fortran XML (or FoX)
+ https://github.com/andreww/fox
+
+ libzip-wrapper
+ https://git.approximatrix.com/cgit.cgi/libzip-wrapper/
+
+ libzip (indirectly due to the above)
diff --git a/driver.f90 b/driver.f90
new file mode 100644
index 0000000..eae0b34
--- /dev/null
+++ b/driver.f90
@@ -0,0 +1,133 @@
+! Copyright (c) 2022 Approximatrix, LLC <support@approximatrix.com>
+!
+! Permission is hereby granted, free of charge, to any person obtaining a copy
+! of this software and associated documentation files (the "Software"), to deal
+! in the Software without restriction, including without limitation the rights
+! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+! copies of the Software, and to permit persons to whom the Software is
+! furnished to do so, subject to the following conditions:
+!
+! The above copyright notice and this permission notice shall be included in
+! all copies or substantial portions of the Software.
+!
+! The Software shall be used for Good, not Evil.
+!
+! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+! SOFTWARE.
+
+program driver
+use fpoint_pptxzip
+implicit none
+
+ type(pptxtracted)::presentation
+ !character(len=*), parameter::filename = "Welcome to PowerPoint.pptx" !"samplepptx.pptx"
+ logical::verbose
+ logical::notes
+ integer::i, j
+ character(len=:), pointer::filename, arg
+
+ if(command_argument_count() < 1) then
+ call usage()
+ call exit(0)
+ end if
+
+ verbose = .false.
+ notes = .false.
+ filename => null()
+ do i = 1, command_argument_count()
+
+ call get_command_argument(i, length=j)
+ allocate(character(len=j)::arg)
+ call get_command_argument(i, value=arg)
+
+ if(arg == "-n") then
+ notes = .true.
+
+ else if(arg == "-v") then
+ verbose = .true.
+
+ else
+ filename => arg
+ arg => null()
+
+ end if
+
+ if(associated(arg)) then
+ deallocate(arg)
+ arg => null()
+ end if
+
+ end do
+
+ if(verbose) then
+ call title()
+ end if
+ call maybe_print(verbose, "Loading "//filename)
+
+ if(presentation%open(filename)) then
+
+ call maybe_print(verbose, "File opened at "//trim(presentation%directory))
+
+ call presentation%parse()
+
+ write(*, '(A)') presentation%to_text()
+
+ call presentation%close()
+
+ else
+ call maybe_print(verbose, "Failed on "//filename)
+ call exit(1)
+ end if
+
+ call exit(0)
+
+contains
+
+ subroutine title()
+ implicit none
+
+ Print *, "FPoint - PowerPoint to Text Converter"
+ Print *, "Copyright 2022 Approximatrix, LLC"
+
+ end subroutine title
+
+ subroutine usage()
+ implicit none
+
+ character(512)::prg
+
+ call get_command_argument(0, prg)
+
+ call title()
+
+ Print *, " "
+ Print *, "Usage: "//trim(prg)//" [options] <input filename>"
+ Print *, " "
+ Print *, "Options:"
+ Print *, " "
+ Print *, " -n Extract notes instead of slide contents"
+ Print *, " -v Be somewhat verbose, probably not enough to be interesting"
+ Print *, " "
+ Print *, "Output is printed to standard out"
+ Print *, " "
+
+ end subroutine usage
+
+ subroutine maybe_print(v, str)
+ implicit none
+
+ logical, intent(in)::v
+ character(len=*), intent(in)::str
+
+ if(v) then
+ Print *, str
+ end if
+ end subroutine maybe_print
+
+end program driver
+ \ No newline at end of file
diff --git a/fpoint.prj b/fpoint.prj
new file mode 100644
index 0000000..5d66372
--- /dev/null
+++ b/fpoint.prj
@@ -0,0 +1,78 @@
+{
+ "Root":{
+ "Folders":[],
+ "Name":"+fpoint (fpoint.exe)",
+ "Files":[{
+ "filename":".\\driver.f90",
+ "enabled":"1"
+ },{
+ "filename":".\\fsutil.F90",
+ "enabled":"1"
+ },{
+ "filename":".\\pptxml.f90",
+ "enabled":"1"
+ },{
+ "filename":".\\pptxzip.f90",
+ "enabled":"1"
+ },{
+ "filename":".\\winsupport.c",
+ "enabled":"1"
+ }]
+ },
+ "Name":"fpoint (fpoint.exe)",
+ "Options":{
+ "Compiler Options":{
+ "Fortran Flags":"-D__WIN32__",
+ "Link Flags":"-lFoX_dom -lFoX_sax -lFoX_utils -lFoX_common -lFoX_fsys -lfzip.dll -luser32 -lkernel32",
+ "C Flags":""
+ },
+ "Architecture":0,
+ "Type":0,
+ "Revision":2,
+ "Windows GUI":0,
+ "File Options":{
+ "Library Directories":["Default Add-On Directory","../../Workspace/git/libzip-wrapper"],
+ "Build Directory":"build",
+ "Module Directory":"modules",
+ "Include Directories":["Default Add-On Include Directory","../../Workspace/git/libzip-wrapper/modules"]
+ },
+ "Target":"fpoint.exe",
+ "Fortran Options":{
+ "Use C Preprocessor":"false",
+ "Runtime Diagnostics":"false",
+ "Floating Point Exception Trap":0,
+ "Cray Pointers":"false",
+ "Enable Coarrays":"false",
+ "Enable OpenMP":"false",
+ "Initialize Variables to Zero":"false",
+ "Default Double for Real":"false"
+ },
+ "Code Generation Options":{
+ "CPU Specific":"false",
+ "Processor":"generic",
+ "Aggressive Loops":"false",
+ "Debugging":"true",
+ "Optimization Mode":0,
+ "Profiling":"false"
+ },
+ "Build Dependencies":1,
+ "Launch Options":{
+ "Working Directory":"",
+ "Launch Using MPI":"false",
+ "Keep Console":"true",
+ "External Console":"true",
+ "Command Line Arguments":"",
+ "Build Before Launch":"true"
+ },
+ "Build Options":{
+ "Auto Management":"true",
+ "Makefile":"Makefile.fpoint",
+ "Auto Makefile":"true"
+ },
+ "Linker Options":{
+ "Static Linking Mode":7,
+ "Link MPI Library":"false",
+ "Link LAPACK":0
+ }
+ }
+} \ No newline at end of file
diff --git a/fsutil.F90 b/fsutil.F90
new file mode 100644
index 0000000..3dcf20c
--- /dev/null
+++ b/fsutil.F90
@@ -0,0 +1,88 @@
+! Copyright (c) 2022 Approximatrix, LLC <support@approximatrix.com>
+!
+! Permission is hereby granted, free of charge, to any person obtaining a copy
+! of this software and associated documentation files (the "Software"), to deal
+! in the Software without restriction, including without limitation the rights
+! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+! copies of the Software, and to permit persons to whom the Software is
+! furnished to do so, subject to the following conditions:
+!
+! The above copyright notice and this permission notice shall be included in
+! all copies or substantial portions of the Software.
+!
+! The Software shall be used for Good, not Evil.
+!
+! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+! SOFTWARE.
+
+module fpoint_fsutil
+implicit none
+
+#ifdef __WIN32__
+ character, parameter::pathsep = '\'
+#else
+ character, parameter::pathsep = '/'
+#endif
+
+contains
+
+ subroutine append_to_path(path, additive)
+ implicit none
+
+ character(len=*), intent(inout)::path
+ character(len=*), intent(in)::additive
+ integer::istart
+
+ if(path(len_trim(path):len_trim(path)) /= pathsep) then
+ path = trim(path)//pathsep
+ end if
+
+ istart = 1
+ if(additive(1:1) == pathsep) then
+ istart = 2
+ end if
+
+ path = trim(path)//additive(istart:len_trim(additive))
+
+ end subroutine append_to_path
+
+ subroutine get_temporary_directory(dir)
+ implicit none
+
+ character(len=*), intent(out)::dir
+
+ call get_environment_variable("TEMP", dir)
+
+ end subroutine get_temporary_directory
+
+ subroutine make_directory(dir)
+ use iso_c_binding
+ implicit none
+
+ character(len=*), intent(in)::dir
+
+#ifdef __WIN32__
+ character(len=:, kind=c_char), pointer::cdir
+ interface
+ subroutine make_dir(str) bind(c, name="make_directory_on_windows")
+ use iso_c_binding
+ type(c_ptr), value::str
+ end subroutine make_dir
+ end interface
+
+ allocate(character(len=(len_trim(dir)+1))::cdir)
+ cdir = trim(dir)//c_null_char
+ call make_dir(c_loc(cdir))
+ deallocate(cdir)
+#else
+ call execute_command_line('mkdir "'//trim(dir)//'"', wait=.true.)
+#endif
+
+ end subroutine make_directory
+
+end module fpoint_fsutil \ No newline at end of file
diff --git a/pptxml.f90 b/pptxml.f90
new file mode 100644
index 0000000..c785bb8
--- /dev/null
+++ b/pptxml.f90
@@ -0,0 +1,664 @@
+! Copyright (c) 2022 Approximatrix, LLC <support@approximatrix.com>
+!
+! Permission is hereby granted, free of charge, to any person obtaining a copy
+! of this software and associated documentation files (the "Software"), to deal
+! in the Software without restriction, including without limitation the rights
+! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+! copies of the Software, and to permit persons to whom the Software is
+! furnished to do so, subject to the following conditions:
+!
+! The above copyright notice and this permission notice shall be included in
+! all copies or substantial portions of the Software.
+!
+! The Software shall be used for Good, not Evil.
+!
+! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+! SOFTWARE.
+
+module fpoint_pptxml
+use FoX_dom
+implicit none
+
+ private
+ public::slide
+
+ integer, parameter::list_single_bullet_max_length = 512
+
+ integer, parameter::SLIDENUM_UNKNOWN = 0
+ integer, parameter::SLIDENUM_DOESNOTEXIST = -1
+ integer, parameter::SLIDENUM_ERRORPARSING = -2
+
+ character(*), parameter::presentation_schema = "http://schemas.openxmlformats.org/presentationml/2006/main"
+ character(*), parameter::drawing_schema = "http://schemas.openxmlformats.org/drawingml/2006/main"
+
+ type::slide
+
+ integer::number
+
+ character(len=:), pointer::title
+
+ type(Node), pointer::slide_dom
+ class(element), pointer::content
+
+ contains
+
+ procedure :: load => slide_load_filename
+ procedure :: to_text => slide_to_text
+ procedure :: text_length => slide_text_length
+
+ end type slide
+
+ type, abstract::element
+ class(element), pointer :: next
+ contains
+ procedure(from_node), deferred :: from_node
+ procedure(to_text), deferred :: to_text
+ procedure :: length => element_text_length
+ end type element
+
+ abstract interface
+ function to_text(self)
+ import::element
+ class(element), intent(in)::self
+ character(len=:), pointer::to_text
+ end function to_text
+ end interface
+
+ abstract interface
+ subroutine from_node(self, n)
+ use FoX_dom
+ import::element
+ class(element), intent(inout)::self
+ type(Node), pointer::n
+ end subroutine from_node
+ end interface
+
+ type, extends(element)::text_element
+ character(len=:), pointer::text
+ contains
+ procedure::to_text => text_element_to_text
+ procedure::from_node => text_element_from_node
+ end type text_element
+
+ type, extends(text_element)::heading_element
+ contains
+ procedure::to_text => heading_element_to_text
+ end type heading_element
+
+ type, extends(element)::list_element
+ character(len=list_single_bullet_max_length), dimension(:), pointer::text
+ contains
+ procedure::to_text => list_element_to_text
+ procedure::from_node => list_element_from_node
+ end type list_element
+
+contains
+
+ function get_child_text(n) result(t)
+ use FoX_dom
+ implicit none
+
+ type(Node), pointer::n, tn
+ character(len=:), pointer::t, resize_t
+ type(NodeList), pointer::text_nodes
+
+ text_nodes => getElementsByTagNameNS(n, drawing_schema, "t")
+ if(getLength(text_nodes) >= 1) then
+ tn => item(text_nodes, 0)
+ allocate(character(len=1024) :: resize_t)
+ call extractDataContent(tn, resize_t)
+ allocate(character(len=len_trim(resize_t)) :: t)
+ t = resize_t
+ deallocate(resize_t)
+ end if
+
+ end function get_child_text
+
+ function find_slide_title(dom) result(t)
+ use FoX_dom
+ implicit none
+
+ type(Node), pointer::dom
+ character(len=:), pointer::t
+
+ type(NodeList), pointer::nonvis_props
+ type(Node), pointer::one_props, sp, nonvis_sp
+ character(len=6)::temp
+ integer::i
+
+ t => null()
+ nonvis_props => getElementsByTagNameNS(dom, presentation_schema, "cNvPr")
+
+ do i=1, getLength(nonvis_props)
+ one_props => item(nonvis_props, i-1)
+ if(hasAttribute(one_props, "name")) then
+ temp = getAttribute(one_props, "name")
+ if(temp(1:6) == "Title ") then
+ exit
+ end if
+ end if
+ one_props => null()
+ end do
+
+ if(associated(one_props)) then
+ nonvis_sp => getParentNode(one_props)
+ sp => getParentNode(nonvis_sp)
+ if(associated(sp)) then
+ t => concatenated_text_in_rows(getElementsByTagNameNS(sp, drawing_schema, "r"))
+ end if
+ end if
+
+ end function find_slide_title
+
+ function parse_one_sp_node(n) result(e)
+ use FoX_dom
+ implicit none
+
+ type(Node), pointer::n
+ class(element), pointer::e
+
+ type(heading_element), pointer::he
+ type(text_element), pointer::te
+ type(list_element), pointer::le
+
+ type(NodeList), pointer::nonvis_sp, bullet_defns
+ type(Node), pointer::prop_node
+
+ integer::i
+
+ e => null()
+
+ nonvis_sp => getElementsByTagNameNS(n, presentation_schema, "cNvPr")
+ do i=1, getLength(nonvis_sp)
+ prop_node => item(nonvis_sp, i-1)
+
+ ! Try based on property name
+ if(hasAttribute(prop_node, "name")) then
+
+ ! if "Title" or "Subtitle" is present
+ if(index(getAttribute(prop_node, "name"), "itle") > 0) then
+ allocate(he)
+ call he%from_node(n)
+ e => he
+ exit
+ end if
+
+ end if
+
+ end do
+
+ ! Next, check for bullets
+ if(.not. associated(e)) then
+ bullet_defns => getElementsByTagNameNS(n, drawing_schema, "cNvPr")
+ if(associated(bullet_defns)) then
+ if(getLength(bullet_defns) > 0) then
+ allocate(le)
+ call le%from_node(n)
+ e => le
+ end if
+ end if
+ end if
+
+ if(.not. associated(e)) then
+ bullet_defns => getElementsByTagNameNS(n, drawing_schema, "lstStyle")
+ if(associated(bullet_defns)) then
+ if(getLength(bullet_defns) > 0) then
+ allocate(le)
+ call le%from_node(n)
+ e => le
+ end if
+ end if
+ end if
+
+ ! Finally, try treating as a text node
+ if(.not. associated(e)) then
+ allocate(te)
+ call te%from_node(n)
+ e=>te
+ end if
+
+ if(associated(e)) then
+ if(e%length() == 0) then
+ deallocate(e)
+ e => null()
+ else
+ e%next => null()
+ end if
+ end if
+
+ end function parse_one_sp_node
+
+ recursive function walk_sp_tree(parent) result(elements)
+ use FoX_dom
+ implicit none
+
+ type(Node), pointer::parent
+ class(element), pointer::elements
+
+ type(Node), pointer::one_child
+ type(NodeList), pointer::child_nodes
+ class(element), pointer::child_elements, walker
+ integer::i
+ type(DomException)::fex
+ character(32)::tname
+
+ elements => null()
+
+ child_nodes => getChildNodes(parent)
+ do i = 1, getLength(child_nodes)
+ one_child => item(child_nodes, i-1)
+
+ child_elements => null()
+
+ tname = getTagName(one_child, fex)
+
+ if(.not. inException(fex)) then
+ ! Single sp
+ if(trim(tname) == "p:sp") then
+ child_elements => parse_one_sp_node(one_child)
+
+ ! A group of sp's
+ elseif(trim(tname) == "p:grpSp") then
+ child_elements => walk_sp_tree(one_child)
+
+ end if
+
+ if(associated(child_elements)) then
+
+ ! Append
+ if(associated(elements)) then
+
+ walker => elements
+ do while(associated(walker%next))
+ walker => walker%next
+ end do
+ walker%next => child_elements
+
+ ! First one!
+ else
+ elements => child_elements
+ end if
+
+ end if
+ end if
+
+ end do
+
+ end function walk_sp_tree
+
+ subroutine parse_slide_sp(self)
+ use FoX_dom
+ implicit none
+
+ class(slide), intent(inout)::self
+
+ type(NodeList), pointer::nl
+ type(Node), pointer::sptree
+
+ nl => getElementsByTagNameNS(self%slide_dom, &
+ presentation_schema, "spTree")
+
+ self%content => null()
+ if(associated(nl)) then
+ if(getLength(nl) >= 1) then
+ sptree => item(nl, 0)
+ self%content => walk_sp_tree(sptree)
+ end if
+ end if
+
+ end subroutine parse_slide_sp
+
+ subroutine slide_load_filename(self, filename)
+ use FoX_dom
+ implicit none
+
+ class(slide), intent(out)::self
+ character(len=*), intent(in)::filename
+ character(len=:), pointer::filename_no_backslashes
+
+ ! For parsing the title out
+ type(NodeList), pointer::sp_elements
+ type(Node), pointer::title_parent
+
+ integer::ext, startnum, ios
+ logical::existence
+
+ integer::i
+
+ class(element), pointer::walker, tmp
+
+ self%number = SLIDENUM_UNKNOWN
+
+ inquire(file=filename, exist=existence)
+ if(.not. existence) then
+ self%number = SLIDENUM_DOESNOTEXIST
+ return
+ end if
+
+ ext = index(filename, ".xml", back=.true.)
+ if(ext >= 1) then
+ startnum = index(filename(1:ext), "slide", back=.true.)
+ if(startnum >= 1) then
+ startnum = startnum + 5
+ read(filename(startnum:ext-1), *, iostat=ios) self%number
+ end if
+ end if
+
+ ! Now actually load the xml
+ if(index(filename, "\") > 0) then
+ allocate(character(len=len_trim(filename))::filename_no_backslashes)
+ filename_no_backslashes = filename
+ i = index(filename_no_backslashes, '\')
+ do while(i > 0)
+ filename_no_backslashes(i:i) = '/'
+ i = index(filename_no_backslashes, '\')
+ end do
+ ! This was all very stupid...
+ self%slide_dom => parseFile(filename_no_backslashes)
+ deallocate(filename_no_backslashes)
+ else
+ self%slide_dom => parseFile(filename)
+ end if
+ if(.not. associated(self%slide_dom)) then
+ self%number = SLIDENUM_ERRORPARSING
+ return
+ end if
+
+ self%title => find_slide_title(self%slide_dom)
+ if(.not. associated(self%title) .and. self%number > 0) then
+ allocate(character(len=16)::self%title)
+ write(self%title, '(I8)') self%number
+ self%title = "Slide "//trim(adjustl(self%title))
+ end if
+
+ call parse_slide_sp(self)
+
+ ! This gets rid of double-accounting for the title
+ if(associated(self%content) .and. associated(self%title)) then
+ tmp => null()
+ walker => self%content
+ do while(associated(walker))
+ if(self%content%to_text() == "## "//self%title) then
+ if(.not.associated(tmp)) then
+ self%content => walker%next
+ else
+ tmp%next => walker%next
+ end if
+ deallocate(walker)
+ exit
+ end if
+ tmp => walker
+ walker => walker%next
+ end do
+ end if
+
+ end subroutine slide_load_filename
+
+ function element_text_length(self)
+ implicit none
+
+ class(element), intent(in)::self
+ integer::element_text_length
+ character(len=:), pointer::text_temp
+
+ text_temp => self%to_text()
+ if(associated(text_temp)) then
+ element_text_length = len_trim(text_temp)
+ deallocate(text_temp)
+ else
+ element_text_length = 0
+ end if
+
+ end function element_text_length
+
+ function heading_element_to_text(self) result(t)
+ implicit none
+
+ class(heading_element), intent(in)::self
+ character(len=:), pointer::t
+
+ t => null()
+ if(associated(self%text)) then
+ allocate(character(len=len_trim(self%text)+3) :: t)
+ t = "## "//self%text
+ end if
+
+ end function heading_element_to_text
+
+ function text_element_to_text(self) result(t)
+ implicit none
+
+ class(text_element), intent(in)::self
+ character(len=:), pointer::t
+
+ t => null()
+ if(associated(self%text)) then
+ allocate(character(len=len_trim(self%text)) :: t)
+ t = self%text
+ end if
+ end function text_element_to_text
+
+ function concatenated_text_in_row(r) result(t)
+ use FoX_dom
+ implicit none
+
+ type(Node), pointer::r
+ character(len=:), pointer::t
+ type(NodeList), pointer::text_nodes
+ type(Node), pointer::tn, bairn
+ integer::i, text_length, j
+
+ text_length = 0
+ text_nodes => getElementsByTagNameNS(r, drawing_schema, "t")
+ do i = 1, getLength(text_nodes)
+ tn => item(text_nodes, i-1)
+ do j = 1, getLength(getChildNodes(tn))
+ bairn => item(getChildNodes(tn), i-1)
+ text_length = text_length + len(getNodeValue(bairn))
+ end do
+ end do
+
+ allocate(character(len=text_length)::t)
+ text_length = 1
+ do i = 1, getLength(text_nodes)
+ tn => item(text_nodes, i-1)
+ do j = 1, getLength(getChildNodes(tn))
+ bairn => item(getChildNodes(tn), i-1)
+
+ t(text_length:len(t)) = getNodeValue(bairn)
+ text_length = text_length + len(getNodeValue(bairn))
+ end do
+
+ end do
+
+ end function concatenated_text_in_row
+
+ function concatenated_text_in_rows(rows) result(t)
+ implicit none
+
+ type(NodeList), pointer::rows
+ character(len=:), pointer::t
+
+ character(len=:), pointer::one_row_text
+ integer::i, text_length
+
+ t => null()
+ if(getLength(rows) == 0) then
+ return
+ end if
+
+ text_length = 0
+ do i = 1, getLength(rows)
+ one_row_text => concatenated_text_in_row(item(rows, i-1))
+ if(associated(one_row_text)) then
+ text_length = text_length + len(one_row_text)
+ deallocate(one_row_text)
+ end if
+ end do
+
+ allocate(character(len=text_length) :: t)
+ text_length = 1
+ do i = 1, getLength(rows)
+ one_row_text => concatenated_text_in_row(item(rows, i-1))
+ if(associated(one_row_text)) then
+ t(text_length:len(t)) = one_row_text
+ text_length = text_length + len(one_row_text)
+ deallocate(one_row_text)
+ end if
+ end do
+
+ end function concatenated_text_in_rows
+
+ subroutine text_element_from_node(self, n)
+ use FoX_dom
+ implicit none
+
+ class(text_element), intent(inout)::self
+ type(Node), pointer::n
+ type(NodeList), pointer::rows
+
+ rows => getElementsByTagNameNS(n, drawing_schema, "r")
+ self%text => concatenated_text_in_rows(rows)
+
+ end subroutine text_element_from_node
+
+ function slide_to_text(self) result(t)
+ implicit none
+
+ class(slide), intent(in)::self
+ character(len=:), pointer::t
+
+ character(len=:), pointer::one_element
+ class(element), pointer::walker
+ integer::text_length
+
+ text_length = 0
+ walker => self%content
+ do while(associated(walker))
+ text_length = text_length + walker%length() + 2*len(new_line(' '))
+ walker => walker%next
+ end do
+
+ if(associated(self%title)) then
+ text_length = text_length + 2 + len_trim(self%title) + 2*len(new_line(' ')) + 2
+ end if
+
+ allocate(character(len=text_length) :: t)
+ t = " "
+ text_length = 1
+
+ if(associated(self%title)) then
+ t = "# "//trim(self%title)//repeat(new_line(' '),2)
+ text_length = text_length + 2 + len_trim(self%title) + 2*len(new_line(' '))
+ end if
+
+ walker => self%content
+ do while(associated(walker))
+ one_element => walker%to_text()
+ if(associated(one_element)) then
+ t(text_length:len(t)) = one_element//repeat(new_line(' '),2)
+ text_length = text_length + len_trim(one_element) + 2*len(new_line(' '))
+ deallocate(one_element)
+ end if
+ walker => walker%next
+ end do
+
+ end function slide_to_text
+
+ function slide_text_length(self)
+ implicit none
+
+ class(slide), intent(in)::self
+ integer::slide_text_length
+
+ character(len=:), pointer::tmp
+
+ tmp => self%to_text()
+ if(associated(tmp)) then
+ slide_text_length = len_trim(tmp)
+ else
+ slide_text_length = 0
+ end if
+
+ end function slide_text_length
+
+ subroutine list_element_from_node(self, n)
+ use FoX_dom
+ implicit none
+
+ class(list_element), intent(inout)::self
+ type(Node), pointer::n
+
+ character(len=:), pointer::one_text
+ integer::i, j
+ type(NodeList), pointer::p_nodes, row_nodes
+ type(Node), pointer:: one_p
+
+ character(len=list_single_bullet_max_length), dimension(:), pointer::tmp
+
+ p_nodes => getElementsByTagNameNS(n, drawing_schema, "p")
+ allocate(tmp(getLength(p_nodes)))
+ tmp = " "
+
+ j = 0
+ do i = 1, getLength(p_nodes)
+ one_p => item(p_nodes, i-1)
+ row_nodes => getElementsByTagNameNS(one_p, drawing_schema, "r")
+ one_text => concatenated_text_in_rows(row_nodes)
+ if(associated(one_text)) then
+ if(len_trim(one_text) > 0) then
+ j = j + 1
+ tmp(j) = one_text
+ end if
+ deallocate(one_text)
+ end if
+ end do
+
+ ! j now holds the number of bullets with actual text
+ allocate(self%text(j))
+ self%text = tmp(1:j)
+ deallocate(tmp)
+
+ end subroutine list_element_from_node
+
+ function list_element_to_text(self) result(t)
+ implicit none
+
+ class(list_element), intent(in)::self
+ character(len=:), pointer::t
+
+ integer::i, text_length
+
+ t => null()
+ if(.not. associated(self%text)) then
+ return
+ end if
+
+ text_length = 0
+ do i = 1, size(self%text)
+ text_length = text_length + &
+ len_trim(self%text(i)) + &
+ len(new_line(' ')) + 2
+ end do
+
+ allocate(character(len=text_length) :: t)
+ t = " "
+
+ text_length = 1
+ do i = 1, size(self%text)
+ if(i < size(self%text)) then
+ t(text_length:len(t)) = "* "//trim(self%text(i))//new_line(' ')
+ text_length = text_length + len(new_line(' ')) + len_trim(self%text(i))+2
+ else
+ t(text_length:len(t)) = "* "//trim(self%text(i))
+ end if
+ end do
+
+ end function list_element_to_text
+
+end module fpoint_pptxml \ No newline at end of file
diff --git a/pptxzip.f90 b/pptxzip.f90
new file mode 100644
index 0000000..d526125
--- /dev/null
+++ b/pptxzip.f90
@@ -0,0 +1,204 @@
+! Copyright (c) 2022 Approximatrix, LLC <support@approximatrix.com>
+!
+! Permission is hereby granted, free of charge, to any person obtaining a copy
+! of this software and associated documentation files (the "Software"), to deal
+! in the Software without restriction, including without limitation the rights
+! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+! copies of the Software, and to permit persons to whom the Software is
+! furnished to do so, subject to the following conditions:
+!
+! The above copyright notice and this permission notice shall be included in
+! all copies or substantial portions of the Software.
+!
+! The Software shall be used for Good, not Evil.
+!
+! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+! SOFTWARE.
+
+module fpoint_pptxzip
+use fpoint_pptxml
+implicit none
+
+ private
+ public::pptxtracted
+
+ type :: pptxtracted
+
+ character(len=2048)::directory
+ character(len=2048), dimension(:), pointer::slide_paths
+ character(len=2048), dimension(:), pointer::note_paths
+
+ type(slide), dimension(:), pointer::slides
+
+ contains
+
+ procedure :: open => pptx_open
+ procedure :: close => pptx_close
+ procedure :: parse => pptx_parse
+ procedure :: to_text => pptx_to_text
+ procedure :: slide_has_notes
+ procedure :: slide_count
+
+ end type pptxtracted
+
+contains
+
+ function slide_has_notes(self, i)
+ implicit none
+
+ class(pptxtracted), intent(in)::self
+ integer, intent(in)::i
+ logical::slide_has_notes
+
+ if(i > size(self%note_paths) .or. i < 1) then
+ slide_has_notes = .false.
+ else
+ slide_has_notes = (len_trim(self%note_paths(i)) > 0)
+ end if
+
+ end function slide_has_notes
+
+ function slide_count(self)
+ implicit none
+
+ class(pptxtracted), intent(in)::self
+ integer::slide_count
+
+ if(associated(self%slide_paths)) then
+ slide_count = size(self%slide_paths)
+ else
+ slide_count = 0
+ end if
+
+ end function slide_count
+
+ function pptx_open(self, filename) result(res)
+ use fpoint_fsutil
+ use m_unzip, only: unzip
+ implicit none
+
+ logical::res
+ class(pptxtracted), intent(out)::self
+ character(len=*), intent(in)::filename
+ integer::i, slidecount
+ character(len=2048)::slidedir, slidecheck, notesdir
+ character(len=4)::numtext
+ logical::existence
+
+ call get_temporary_directory(self%directory)
+ i = index(filename, pathsep, back=.true.) + 1
+ call append_to_path(self%directory, filename(i:len_trim(filename))//".extracted")
+ call make_directory(trim(self%directory))
+
+ ! Extract the file into said directory
+ res = unzip(filename, trim(self%directory))
+
+ ! Count slides
+ slidedir = self%directory
+ call append_to_path(slidedir, 'ppt')
+ call append_to_path(slidedir, 'slides')
+ do i = 1, 512
+ slidecheck = slidedir
+ write(numtext, '(I4)') i
+ call append_to_path(slidecheck, "slide"//trim(adjustl(numtext))//".xml")
+ inquire(file=slidecheck, exist=existence)
+ if(.not. existence) then
+ exit
+ end if
+ end do
+ slidecount = i - 1
+
+ allocate(self%slide_paths(slidecount))
+ self%slide_paths = slidedir
+ do i = 1, slidecount
+ write(numtext, '(I4)') i
+ call append_to_path(self%slide_paths(i), "slide"//trim(adjustl(numtext))//".xml")
+ end do
+
+ ! Now check for notes
+ notesdir = self%directory
+ call append_to_path(notesdir, 'ppt')
+ call append_to_path(notesdir, 'notesSlides')
+ allocate(self%note_paths(slidecount))
+ self%note_paths = notesdir
+ do i = 1, slidecount
+ write(numtext, '(I4)') i
+ call append_to_path(self%note_paths(i), "notesSlide"//trim(adjustl(numtext))//".xml")
+ inquire(file=self%note_paths(i), exist=existence)
+ if(.not. existence) then
+ self%note_paths(i) = " "
+ end if
+ end do
+
+ ! These should only be explicitly parsed
+ self%slides => null()
+
+ end function pptx_open
+
+ subroutine pptx_close(self)
+ implicit none
+
+ class(pptxtracted), intent(inout)::self
+
+ ! Delete directory tree at self%directory
+
+ deallocate(self%slide_paths)
+ self%slide_paths => null()
+
+ deallocate(self%note_paths)
+ self%note_paths => null()
+
+ self%directory = " "
+
+ end subroutine pptx_close
+
+ subroutine pptx_parse(self)
+ use fpoint_pptxml
+ implicit none
+
+ class(pptxtracted), intent(inout)::self
+ integer::i
+ allocate(self%slides(self%slide_count()))
+
+ do i = 1, self%slide_count()
+ call self%slides(i)%load(self%slide_paths(i))
+ end do
+
+ end subroutine pptx_parse
+
+ function pptx_to_text(self) result(t)
+ use fpoint_pptxml
+ implicit none
+
+ class(pptxtracted), intent(in)::self
+ integer::text_length
+ character(len=:), pointer::t, tmp
+ integer::i
+
+ t => null()
+
+ if(self%slide_count() > 0 .and. associated(self%slides)) then
+ text_length = 0
+ do i = 1, self%slide_count()
+ text_length = text_length + self%slides(i)%text_length()
+ end do
+
+ allocate(character(len=text_length) :: t)
+
+ text_length = 1
+ do i = 1, self%slide_count()
+ tmp => self%slides(i)%to_text()
+ t(text_length:len(t)) = trim(tmp)
+ text_length = text_length + len_trim(tmp)
+ end do
+
+ end if
+
+ end function pptx_to_text
+
+end module fpoint_pptxzip
diff --git a/winsupport.c b/winsupport.c
new file mode 100644
index 0000000..7479b8a
--- /dev/null
+++ b/winsupport.c
@@ -0,0 +1,30 @@
+/* Copyright (c) 2022 Approximatrix, LLC <support@approximatrix.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * The Software shall be used for Good, not Evil.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <windows.h>
+
+/* I can't even believe this is necessary on win32 32-bit... */
+void make_directory_on_windows(char *cdir)
+{
+ CreateDirectory(cdir, NULL);
+}