diff options
-rw-r--r-- | LICENSE | 21 | ||||
-rw-r--r-- | README.md | 23 | ||||
-rw-r--r-- | driver.f90 | 133 | ||||
-rw-r--r-- | fpoint.prj | 78 | ||||
-rw-r--r-- | fsutil.F90 | 88 | ||||
-rw-r--r-- | pptxml.f90 | 664 | ||||
-rw-r--r-- | pptxzip.f90 | 204 | ||||
-rw-r--r-- | winsupport.c | 30 |
8 files changed, 1241 insertions, 0 deletions
@@ -0,0 +1,21 @@ +Copyright (c) 2022 Approximatrix, LLC <support@approximatrix.com> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +The Software shall be used for Good, not Evil. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..63aa50c --- /dev/null +++ b/README.md @@ -0,0 +1,23 @@ +FPoint - PowerPoint to Text Converter +===================================== + +This program is a crude attempt to extract text from PowerPoint +presentations files (*.pptx only, no legacy) written almost entirely in +Fortran. The code attempts to decipher what is meant by each slide's +structure in a vague manner. The text generated, at this time, is +loosely based on gemtext, generally considered a subset of Markdown. + +Please see LICENSE for the license terms. + +Requirements +------------ + +This program requires the following: + + Fortran XML (or FoX) + https://github.com/andreww/fox + + libzip-wrapper + https://git.approximatrix.com/cgit.cgi/libzip-wrapper/ + + libzip (indirectly due to the above) diff --git a/driver.f90 b/driver.f90 new file mode 100644 index 0000000..eae0b34 --- /dev/null +++ b/driver.f90 @@ -0,0 +1,133 @@ +! Copyright (c) 2022 Approximatrix, LLC <support@approximatrix.com> +! +! Permission is hereby granted, free of charge, to any person obtaining a copy +! of this software and associated documentation files (the "Software"), to deal +! in the Software without restriction, including without limitation the rights +! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +! copies of the Software, and to permit persons to whom the Software is +! furnished to do so, subject to the following conditions: +! +! The above copyright notice and this permission notice shall be included in +! all copies or substantial portions of the Software. +! +! The Software shall be used for Good, not Evil. +! +! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +! SOFTWARE. + +program driver +use fpoint_pptxzip +implicit none + + type(pptxtracted)::presentation + !character(len=*), parameter::filename = "Welcome to PowerPoint.pptx" !"samplepptx.pptx" + logical::verbose + logical::notes + integer::i, j + character(len=:), pointer::filename, arg + + if(command_argument_count() < 1) then + call usage() + call exit(0) + end if + + verbose = .false. + notes = .false. + filename => null() + do i = 1, command_argument_count() + + call get_command_argument(i, length=j) + allocate(character(len=j)::arg) + call get_command_argument(i, value=arg) + + if(arg == "-n") then + notes = .true. + + else if(arg == "-v") then + verbose = .true. + + else + filename => arg + arg => null() + + end if + + if(associated(arg)) then + deallocate(arg) + arg => null() + end if + + end do + + if(verbose) then + call title() + end if + call maybe_print(verbose, "Loading "//filename) + + if(presentation%open(filename)) then + + call maybe_print(verbose, "File opened at "//trim(presentation%directory)) + + call presentation%parse() + + write(*, '(A)') presentation%to_text() + + call presentation%close() + + else + call maybe_print(verbose, "Failed on "//filename) + call exit(1) + end if + + call exit(0) + +contains + + subroutine title() + implicit none + + Print *, "FPoint - PowerPoint to Text Converter" + Print *, "Copyright 2022 Approximatrix, LLC" + + end subroutine title + + subroutine usage() + implicit none + + character(512)::prg + + call get_command_argument(0, prg) + + call title() + + Print *, " " + Print *, "Usage: "//trim(prg)//" [options] <input filename>" + Print *, " " + Print *, "Options:" + Print *, " " + Print *, " -n Extract notes instead of slide contents" + Print *, " -v Be somewhat verbose, probably not enough to be interesting" + Print *, " " + Print *, "Output is printed to standard out" + Print *, " " + + end subroutine usage + + subroutine maybe_print(v, str) + implicit none + + logical, intent(in)::v + character(len=*), intent(in)::str + + if(v) then + Print *, str + end if + end subroutine maybe_print + +end program driver +
\ No newline at end of file diff --git a/fpoint.prj b/fpoint.prj new file mode 100644 index 0000000..5d66372 --- /dev/null +++ b/fpoint.prj @@ -0,0 +1,78 @@ +{ + "Root":{ + "Folders":[], + "Name":"+fpoint (fpoint.exe)", + "Files":[{ + "filename":".\\driver.f90", + "enabled":"1" + },{ + "filename":".\\fsutil.F90", + "enabled":"1" + },{ + "filename":".\\pptxml.f90", + "enabled":"1" + },{ + "filename":".\\pptxzip.f90", + "enabled":"1" + },{ + "filename":".\\winsupport.c", + "enabled":"1" + }] + }, + "Name":"fpoint (fpoint.exe)", + "Options":{ + "Compiler Options":{ + "Fortran Flags":"-D__WIN32__", + "Link Flags":"-lFoX_dom -lFoX_sax -lFoX_utils -lFoX_common -lFoX_fsys -lfzip.dll -luser32 -lkernel32", + "C Flags":"" + }, + "Architecture":0, + "Type":0, + "Revision":2, + "Windows GUI":0, + "File Options":{ + "Library Directories":["Default Add-On Directory","../../Workspace/git/libzip-wrapper"], + "Build Directory":"build", + "Module Directory":"modules", + "Include Directories":["Default Add-On Include Directory","../../Workspace/git/libzip-wrapper/modules"] + }, + "Target":"fpoint.exe", + "Fortran Options":{ + "Use C Preprocessor":"false", + "Runtime Diagnostics":"false", + "Floating Point Exception Trap":0, + "Cray Pointers":"false", + "Enable Coarrays":"false", + "Enable OpenMP":"false", + "Initialize Variables to Zero":"false", + "Default Double for Real":"false" + }, + "Code Generation Options":{ + "CPU Specific":"false", + "Processor":"generic", + "Aggressive Loops":"false", + "Debugging":"true", + "Optimization Mode":0, + "Profiling":"false" + }, + "Build Dependencies":1, + "Launch Options":{ + "Working Directory":"", + "Launch Using MPI":"false", + "Keep Console":"true", + "External Console":"true", + "Command Line Arguments":"", + "Build Before Launch":"true" + }, + "Build Options":{ + "Auto Management":"true", + "Makefile":"Makefile.fpoint", + "Auto Makefile":"true" + }, + "Linker Options":{ + "Static Linking Mode":7, + "Link MPI Library":"false", + "Link LAPACK":0 + } + } +}
\ No newline at end of file diff --git a/fsutil.F90 b/fsutil.F90 new file mode 100644 index 0000000..3dcf20c --- /dev/null +++ b/fsutil.F90 @@ -0,0 +1,88 @@ +! Copyright (c) 2022 Approximatrix, LLC <support@approximatrix.com> +! +! Permission is hereby granted, free of charge, to any person obtaining a copy +! of this software and associated documentation files (the "Software"), to deal +! in the Software without restriction, including without limitation the rights +! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +! copies of the Software, and to permit persons to whom the Software is +! furnished to do so, subject to the following conditions: +! +! The above copyright notice and this permission notice shall be included in +! all copies or substantial portions of the Software. +! +! The Software shall be used for Good, not Evil. +! +! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +! SOFTWARE. + +module fpoint_fsutil +implicit none + +#ifdef __WIN32__ + character, parameter::pathsep = '\' +#else + character, parameter::pathsep = '/' +#endif + +contains + + subroutine append_to_path(path, additive) + implicit none + + character(len=*), intent(inout)::path + character(len=*), intent(in)::additive + integer::istart + + if(path(len_trim(path):len_trim(path)) /= pathsep) then + path = trim(path)//pathsep + end if + + istart = 1 + if(additive(1:1) == pathsep) then + istart = 2 + end if + + path = trim(path)//additive(istart:len_trim(additive)) + + end subroutine append_to_path + + subroutine get_temporary_directory(dir) + implicit none + + character(len=*), intent(out)::dir + + call get_environment_variable("TEMP", dir) + + end subroutine get_temporary_directory + + subroutine make_directory(dir) + use iso_c_binding + implicit none + + character(len=*), intent(in)::dir + +#ifdef __WIN32__ + character(len=:, kind=c_char), pointer::cdir + interface + subroutine make_dir(str) bind(c, name="make_directory_on_windows") + use iso_c_binding + type(c_ptr), value::str + end subroutine make_dir + end interface + + allocate(character(len=(len_trim(dir)+1))::cdir) + cdir = trim(dir)//c_null_char + call make_dir(c_loc(cdir)) + deallocate(cdir) +#else + call execute_command_line('mkdir "'//trim(dir)//'"', wait=.true.) +#endif + + end subroutine make_directory + +end module fpoint_fsutil
\ No newline at end of file diff --git a/pptxml.f90 b/pptxml.f90 new file mode 100644 index 0000000..c785bb8 --- /dev/null +++ b/pptxml.f90 @@ -0,0 +1,664 @@ +! Copyright (c) 2022 Approximatrix, LLC <support@approximatrix.com> +! +! Permission is hereby granted, free of charge, to any person obtaining a copy +! of this software and associated documentation files (the "Software"), to deal +! in the Software without restriction, including without limitation the rights +! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +! copies of the Software, and to permit persons to whom the Software is +! furnished to do so, subject to the following conditions: +! +! The above copyright notice and this permission notice shall be included in +! all copies or substantial portions of the Software. +! +! The Software shall be used for Good, not Evil. +! +! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +! SOFTWARE. + +module fpoint_pptxml +use FoX_dom +implicit none + + private + public::slide + + integer, parameter::list_single_bullet_max_length = 512 + + integer, parameter::SLIDENUM_UNKNOWN = 0 + integer, parameter::SLIDENUM_DOESNOTEXIST = -1 + integer, parameter::SLIDENUM_ERRORPARSING = -2 + + character(*), parameter::presentation_schema = "http://schemas.openxmlformats.org/presentationml/2006/main" + character(*), parameter::drawing_schema = "http://schemas.openxmlformats.org/drawingml/2006/main" + + type::slide + + integer::number + + character(len=:), pointer::title + + type(Node), pointer::slide_dom + class(element), pointer::content + + contains + + procedure :: load => slide_load_filename + procedure :: to_text => slide_to_text + procedure :: text_length => slide_text_length + + end type slide + + type, abstract::element + class(element), pointer :: next + contains + procedure(from_node), deferred :: from_node + procedure(to_text), deferred :: to_text + procedure :: length => element_text_length + end type element + + abstract interface + function to_text(self) + import::element + class(element), intent(in)::self + character(len=:), pointer::to_text + end function to_text + end interface + + abstract interface + subroutine from_node(self, n) + use FoX_dom + import::element + class(element), intent(inout)::self + type(Node), pointer::n + end subroutine from_node + end interface + + type, extends(element)::text_element + character(len=:), pointer::text + contains + procedure::to_text => text_element_to_text + procedure::from_node => text_element_from_node + end type text_element + + type, extends(text_element)::heading_element + contains + procedure::to_text => heading_element_to_text + end type heading_element + + type, extends(element)::list_element + character(len=list_single_bullet_max_length), dimension(:), pointer::text + contains + procedure::to_text => list_element_to_text + procedure::from_node => list_element_from_node + end type list_element + +contains + + function get_child_text(n) result(t) + use FoX_dom + implicit none + + type(Node), pointer::n, tn + character(len=:), pointer::t, resize_t + type(NodeList), pointer::text_nodes + + text_nodes => getElementsByTagNameNS(n, drawing_schema, "t") + if(getLength(text_nodes) >= 1) then + tn => item(text_nodes, 0) + allocate(character(len=1024) :: resize_t) + call extractDataContent(tn, resize_t) + allocate(character(len=len_trim(resize_t)) :: t) + t = resize_t + deallocate(resize_t) + end if + + end function get_child_text + + function find_slide_title(dom) result(t) + use FoX_dom + implicit none + + type(Node), pointer::dom + character(len=:), pointer::t + + type(NodeList), pointer::nonvis_props + type(Node), pointer::one_props, sp, nonvis_sp + character(len=6)::temp + integer::i + + t => null() + nonvis_props => getElementsByTagNameNS(dom, presentation_schema, "cNvPr") + + do i=1, getLength(nonvis_props) + one_props => item(nonvis_props, i-1) + if(hasAttribute(one_props, "name")) then + temp = getAttribute(one_props, "name") + if(temp(1:6) == "Title ") then + exit + end if + end if + one_props => null() + end do + + if(associated(one_props)) then + nonvis_sp => getParentNode(one_props) + sp => getParentNode(nonvis_sp) + if(associated(sp)) then + t => concatenated_text_in_rows(getElementsByTagNameNS(sp, drawing_schema, "r")) + end if + end if + + end function find_slide_title + + function parse_one_sp_node(n) result(e) + use FoX_dom + implicit none + + type(Node), pointer::n + class(element), pointer::e + + type(heading_element), pointer::he + type(text_element), pointer::te + type(list_element), pointer::le + + type(NodeList), pointer::nonvis_sp, bullet_defns + type(Node), pointer::prop_node + + integer::i + + e => null() + + nonvis_sp => getElementsByTagNameNS(n, presentation_schema, "cNvPr") + do i=1, getLength(nonvis_sp) + prop_node => item(nonvis_sp, i-1) + + ! Try based on property name + if(hasAttribute(prop_node, "name")) then + + ! if "Title" or "Subtitle" is present + if(index(getAttribute(prop_node, "name"), "itle") > 0) then + allocate(he) + call he%from_node(n) + e => he + exit + end if + + end if + + end do + + ! Next, check for bullets + if(.not. associated(e)) then + bullet_defns => getElementsByTagNameNS(n, drawing_schema, "cNvPr") + if(associated(bullet_defns)) then + if(getLength(bullet_defns) > 0) then + allocate(le) + call le%from_node(n) + e => le + end if + end if + end if + + if(.not. associated(e)) then + bullet_defns => getElementsByTagNameNS(n, drawing_schema, "lstStyle") + if(associated(bullet_defns)) then + if(getLength(bullet_defns) > 0) then + allocate(le) + call le%from_node(n) + e => le + end if + end if + end if + + ! Finally, try treating as a text node + if(.not. associated(e)) then + allocate(te) + call te%from_node(n) + e=>te + end if + + if(associated(e)) then + if(e%length() == 0) then + deallocate(e) + e => null() + else + e%next => null() + end if + end if + + end function parse_one_sp_node + + recursive function walk_sp_tree(parent) result(elements) + use FoX_dom + implicit none + + type(Node), pointer::parent + class(element), pointer::elements + + type(Node), pointer::one_child + type(NodeList), pointer::child_nodes + class(element), pointer::child_elements, walker + integer::i + type(DomException)::fex + character(32)::tname + + elements => null() + + child_nodes => getChildNodes(parent) + do i = 1, getLength(child_nodes) + one_child => item(child_nodes, i-1) + + child_elements => null() + + tname = getTagName(one_child, fex) + + if(.not. inException(fex)) then + ! Single sp + if(trim(tname) == "p:sp") then + child_elements => parse_one_sp_node(one_child) + + ! A group of sp's + elseif(trim(tname) == "p:grpSp") then + child_elements => walk_sp_tree(one_child) + + end if + + if(associated(child_elements)) then + + ! Append + if(associated(elements)) then + + walker => elements + do while(associated(walker%next)) + walker => walker%next + end do + walker%next => child_elements + + ! First one! + else + elements => child_elements + end if + + end if + end if + + end do + + end function walk_sp_tree + + subroutine parse_slide_sp(self) + use FoX_dom + implicit none + + class(slide), intent(inout)::self + + type(NodeList), pointer::nl + type(Node), pointer::sptree + + nl => getElementsByTagNameNS(self%slide_dom, & + presentation_schema, "spTree") + + self%content => null() + if(associated(nl)) then + if(getLength(nl) >= 1) then + sptree => item(nl, 0) + self%content => walk_sp_tree(sptree) + end if + end if + + end subroutine parse_slide_sp + + subroutine slide_load_filename(self, filename) + use FoX_dom + implicit none + + class(slide), intent(out)::self + character(len=*), intent(in)::filename + character(len=:), pointer::filename_no_backslashes + + ! For parsing the title out + type(NodeList), pointer::sp_elements + type(Node), pointer::title_parent + + integer::ext, startnum, ios + logical::existence + + integer::i + + class(element), pointer::walker, tmp + + self%number = SLIDENUM_UNKNOWN + + inquire(file=filename, exist=existence) + if(.not. existence) then + self%number = SLIDENUM_DOESNOTEXIST + return + end if + + ext = index(filename, ".xml", back=.true.) + if(ext >= 1) then + startnum = index(filename(1:ext), "slide", back=.true.) + if(startnum >= 1) then + startnum = startnum + 5 + read(filename(startnum:ext-1), *, iostat=ios) self%number + end if + end if + + ! Now actually load the xml + if(index(filename, "\") > 0) then + allocate(character(len=len_trim(filename))::filename_no_backslashes) + filename_no_backslashes = filename + i = index(filename_no_backslashes, '\') + do while(i > 0) + filename_no_backslashes(i:i) = '/' + i = index(filename_no_backslashes, '\') + end do + ! This was all very stupid... + self%slide_dom => parseFile(filename_no_backslashes) + deallocate(filename_no_backslashes) + else + self%slide_dom => parseFile(filename) + end if + if(.not. associated(self%slide_dom)) then + self%number = SLIDENUM_ERRORPARSING + return + end if + + self%title => find_slide_title(self%slide_dom) + if(.not. associated(self%title) .and. self%number > 0) then + allocate(character(len=16)::self%title) + write(self%title, '(I8)') self%number + self%title = "Slide "//trim(adjustl(self%title)) + end if + + call parse_slide_sp(self) + + ! This gets rid of double-accounting for the title + if(associated(self%content) .and. associated(self%title)) then + tmp => null() + walker => self%content + do while(associated(walker)) + if(self%content%to_text() == "## "//self%title) then + if(.not.associated(tmp)) then + self%content => walker%next + else + tmp%next => walker%next + end if + deallocate(walker) + exit + end if + tmp => walker + walker => walker%next + end do + end if + + end subroutine slide_load_filename + + function element_text_length(self) + implicit none + + class(element), intent(in)::self + integer::element_text_length + character(len=:), pointer::text_temp + + text_temp => self%to_text() + if(associated(text_temp)) then + element_text_length = len_trim(text_temp) + deallocate(text_temp) + else + element_text_length = 0 + end if + + end function element_text_length + + function heading_element_to_text(self) result(t) + implicit none + + class(heading_element), intent(in)::self + character(len=:), pointer::t + + t => null() + if(associated(self%text)) then + allocate(character(len=len_trim(self%text)+3) :: t) + t = "## "//self%text + end if + + end function heading_element_to_text + + function text_element_to_text(self) result(t) + implicit none + + class(text_element), intent(in)::self + character(len=:), pointer::t + + t => null() + if(associated(self%text)) then + allocate(character(len=len_trim(self%text)) :: t) + t = self%text + end if + end function text_element_to_text + + function concatenated_text_in_row(r) result(t) + use FoX_dom + implicit none + + type(Node), pointer::r + character(len=:), pointer::t + type(NodeList), pointer::text_nodes + type(Node), pointer::tn, bairn + integer::i, text_length, j + + text_length = 0 + text_nodes => getElementsByTagNameNS(r, drawing_schema, "t") + do i = 1, getLength(text_nodes) + tn => item(text_nodes, i-1) + do j = 1, getLength(getChildNodes(tn)) + bairn => item(getChildNodes(tn), i-1) + text_length = text_length + len(getNodeValue(bairn)) + end do + end do + + allocate(character(len=text_length)::t) + text_length = 1 + do i = 1, getLength(text_nodes) + tn => item(text_nodes, i-1) + do j = 1, getLength(getChildNodes(tn)) + bairn => item(getChildNodes(tn), i-1) + + t(text_length:len(t)) = getNodeValue(bairn) + text_length = text_length + len(getNodeValue(bairn)) + end do + + end do + + end function concatenated_text_in_row + + function concatenated_text_in_rows(rows) result(t) + implicit none + + type(NodeList), pointer::rows + character(len=:), pointer::t + + character(len=:), pointer::one_row_text + integer::i, text_length + + t => null() + if(getLength(rows) == 0) then + return + end if + + text_length = 0 + do i = 1, getLength(rows) + one_row_text => concatenated_text_in_row(item(rows, i-1)) + if(associated(one_row_text)) then + text_length = text_length + len(one_row_text) + deallocate(one_row_text) + end if + end do + + allocate(character(len=text_length) :: t) + text_length = 1 + do i = 1, getLength(rows) + one_row_text => concatenated_text_in_row(item(rows, i-1)) + if(associated(one_row_text)) then + t(text_length:len(t)) = one_row_text + text_length = text_length + len(one_row_text) + deallocate(one_row_text) + end if + end do + + end function concatenated_text_in_rows + + subroutine text_element_from_node(self, n) + use FoX_dom + implicit none + + class(text_element), intent(inout)::self + type(Node), pointer::n + type(NodeList), pointer::rows + + rows => getElementsByTagNameNS(n, drawing_schema, "r") + self%text => concatenated_text_in_rows(rows) + + end subroutine text_element_from_node + + function slide_to_text(self) result(t) + implicit none + + class(slide), intent(in)::self + character(len=:), pointer::t + + character(len=:), pointer::one_element + class(element), pointer::walker + integer::text_length + + text_length = 0 + walker => self%content + do while(associated(walker)) + text_length = text_length + walker%length() + 2*len(new_line(' ')) + walker => walker%next + end do + + if(associated(self%title)) then + text_length = text_length + 2 + len_trim(self%title) + 2*len(new_line(' ')) + 2 + end if + + allocate(character(len=text_length) :: t) + t = " " + text_length = 1 + + if(associated(self%title)) then + t = "# "//trim(self%title)//repeat(new_line(' '),2) + text_length = text_length + 2 + len_trim(self%title) + 2*len(new_line(' ')) + end if + + walker => self%content + do while(associated(walker)) + one_element => walker%to_text() + if(associated(one_element)) then + t(text_length:len(t)) = one_element//repeat(new_line(' '),2) + text_length = text_length + len_trim(one_element) + 2*len(new_line(' ')) + deallocate(one_element) + end if + walker => walker%next + end do + + end function slide_to_text + + function slide_text_length(self) + implicit none + + class(slide), intent(in)::self + integer::slide_text_length + + character(len=:), pointer::tmp + + tmp => self%to_text() + if(associated(tmp)) then + slide_text_length = len_trim(tmp) + else + slide_text_length = 0 + end if + + end function slide_text_length + + subroutine list_element_from_node(self, n) + use FoX_dom + implicit none + + class(list_element), intent(inout)::self + type(Node), pointer::n + + character(len=:), pointer::one_text + integer::i, j + type(NodeList), pointer::p_nodes, row_nodes + type(Node), pointer:: one_p + + character(len=list_single_bullet_max_length), dimension(:), pointer::tmp + + p_nodes => getElementsByTagNameNS(n, drawing_schema, "p") + allocate(tmp(getLength(p_nodes))) + tmp = " " + + j = 0 + do i = 1, getLength(p_nodes) + one_p => item(p_nodes, i-1) + row_nodes => getElementsByTagNameNS(one_p, drawing_schema, "r") + one_text => concatenated_text_in_rows(row_nodes) + if(associated(one_text)) then + if(len_trim(one_text) > 0) then + j = j + 1 + tmp(j) = one_text + end if + deallocate(one_text) + end if + end do + + ! j now holds the number of bullets with actual text + allocate(self%text(j)) + self%text = tmp(1:j) + deallocate(tmp) + + end subroutine list_element_from_node + + function list_element_to_text(self) result(t) + implicit none + + class(list_element), intent(in)::self + character(len=:), pointer::t + + integer::i, text_length + + t => null() + if(.not. associated(self%text)) then + return + end if + + text_length = 0 + do i = 1, size(self%text) + text_length = text_length + & + len_trim(self%text(i)) + & + len(new_line(' ')) + 2 + end do + + allocate(character(len=text_length) :: t) + t = " " + + text_length = 1 + do i = 1, size(self%text) + if(i < size(self%text)) then + t(text_length:len(t)) = "* "//trim(self%text(i))//new_line(' ') + text_length = text_length + len(new_line(' ')) + len_trim(self%text(i))+2 + else + t(text_length:len(t)) = "* "//trim(self%text(i)) + end if + end do + + end function list_element_to_text + +end module fpoint_pptxml
\ No newline at end of file diff --git a/pptxzip.f90 b/pptxzip.f90 new file mode 100644 index 0000000..d526125 --- /dev/null +++ b/pptxzip.f90 @@ -0,0 +1,204 @@ +! Copyright (c) 2022 Approximatrix, LLC <support@approximatrix.com> +! +! Permission is hereby granted, free of charge, to any person obtaining a copy +! of this software and associated documentation files (the "Software"), to deal +! in the Software without restriction, including without limitation the rights +! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +! copies of the Software, and to permit persons to whom the Software is +! furnished to do so, subject to the following conditions: +! +! The above copyright notice and this permission notice shall be included in +! all copies or substantial portions of the Software. +! +! The Software shall be used for Good, not Evil. +! +! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +! SOFTWARE. + +module fpoint_pptxzip +use fpoint_pptxml +implicit none + + private + public::pptxtracted + + type :: pptxtracted + + character(len=2048)::directory + character(len=2048), dimension(:), pointer::slide_paths + character(len=2048), dimension(:), pointer::note_paths + + type(slide), dimension(:), pointer::slides + + contains + + procedure :: open => pptx_open + procedure :: close => pptx_close + procedure :: parse => pptx_parse + procedure :: to_text => pptx_to_text + procedure :: slide_has_notes + procedure :: slide_count + + end type pptxtracted + +contains + + function slide_has_notes(self, i) + implicit none + + class(pptxtracted), intent(in)::self + integer, intent(in)::i + logical::slide_has_notes + + if(i > size(self%note_paths) .or. i < 1) then + slide_has_notes = .false. + else + slide_has_notes = (len_trim(self%note_paths(i)) > 0) + end if + + end function slide_has_notes + + function slide_count(self) + implicit none + + class(pptxtracted), intent(in)::self + integer::slide_count + + if(associated(self%slide_paths)) then + slide_count = size(self%slide_paths) + else + slide_count = 0 + end if + + end function slide_count + + function pptx_open(self, filename) result(res) + use fpoint_fsutil + use m_unzip, only: unzip + implicit none + + logical::res + class(pptxtracted), intent(out)::self + character(len=*), intent(in)::filename + integer::i, slidecount + character(len=2048)::slidedir, slidecheck, notesdir + character(len=4)::numtext + logical::existence + + call get_temporary_directory(self%directory) + i = index(filename, pathsep, back=.true.) + 1 + call append_to_path(self%directory, filename(i:len_trim(filename))//".extracted") + call make_directory(trim(self%directory)) + + ! Extract the file into said directory + res = unzip(filename, trim(self%directory)) + + ! Count slides + slidedir = self%directory + call append_to_path(slidedir, 'ppt') + call append_to_path(slidedir, 'slides') + do i = 1, 512 + slidecheck = slidedir + write(numtext, '(I4)') i + call append_to_path(slidecheck, "slide"//trim(adjustl(numtext))//".xml") + inquire(file=slidecheck, exist=existence) + if(.not. existence) then + exit + end if + end do + slidecount = i - 1 + + allocate(self%slide_paths(slidecount)) + self%slide_paths = slidedir + do i = 1, slidecount + write(numtext, '(I4)') i + call append_to_path(self%slide_paths(i), "slide"//trim(adjustl(numtext))//".xml") + end do + + ! Now check for notes + notesdir = self%directory + call append_to_path(notesdir, 'ppt') + call append_to_path(notesdir, 'notesSlides') + allocate(self%note_paths(slidecount)) + self%note_paths = notesdir + do i = 1, slidecount + write(numtext, '(I4)') i + call append_to_path(self%note_paths(i), "notesSlide"//trim(adjustl(numtext))//".xml") + inquire(file=self%note_paths(i), exist=existence) + if(.not. existence) then + self%note_paths(i) = " " + end if + end do + + ! These should only be explicitly parsed + self%slides => null() + + end function pptx_open + + subroutine pptx_close(self) + implicit none + + class(pptxtracted), intent(inout)::self + + ! Delete directory tree at self%directory + + deallocate(self%slide_paths) + self%slide_paths => null() + + deallocate(self%note_paths) + self%note_paths => null() + + self%directory = " " + + end subroutine pptx_close + + subroutine pptx_parse(self) + use fpoint_pptxml + implicit none + + class(pptxtracted), intent(inout)::self + integer::i + allocate(self%slides(self%slide_count())) + + do i = 1, self%slide_count() + call self%slides(i)%load(self%slide_paths(i)) + end do + + end subroutine pptx_parse + + function pptx_to_text(self) result(t) + use fpoint_pptxml + implicit none + + class(pptxtracted), intent(in)::self + integer::text_length + character(len=:), pointer::t, tmp + integer::i + + t => null() + + if(self%slide_count() > 0 .and. associated(self%slides)) then + text_length = 0 + do i = 1, self%slide_count() + text_length = text_length + self%slides(i)%text_length() + end do + + allocate(character(len=text_length) :: t) + + text_length = 1 + do i = 1, self%slide_count() + tmp => self%slides(i)%to_text() + t(text_length:len(t)) = trim(tmp) + text_length = text_length + len_trim(tmp) + end do + + end if + + end function pptx_to_text + +end module fpoint_pptxzip diff --git a/winsupport.c b/winsupport.c new file mode 100644 index 0000000..7479b8a --- /dev/null +++ b/winsupport.c @@ -0,0 +1,30 @@ +/* Copyright (c) 2022 Approximatrix, LLC <support@approximatrix.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * The Software shall be used for Good, not Evil. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <windows.h> + +/* I can't even believe this is necessary on win32 32-bit... */ +void make_directory_on_windows(char *cdir) +{ + CreateDirectory(cdir, NULL); +} |