From 452ceb636e37fab2927c688e7b495841879ea29a Mon Sep 17 00:00:00 2001 From: Jeffrey Armstrong Date: Fri, 25 Feb 2022 17:22:43 -0500 Subject: Initial code commit --- LICENSE | 21 ++ README.md | 23 +++ driver.f90 | 133 ++++++++++++ fpoint.prj | 78 +++++++ fsutil.F90 | 88 ++++++++ pptxml.f90 | 664 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ pptxzip.f90 | 204 ++++++++++++++++++ winsupport.c | 30 +++ 8 files changed, 1241 insertions(+) create mode 100644 LICENSE create mode 100644 README.md create mode 100644 driver.f90 create mode 100644 fpoint.prj create mode 100644 fsutil.F90 create mode 100644 pptxml.f90 create mode 100644 pptxzip.f90 create mode 100644 winsupport.c diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..fd3b407 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +Copyright (c) 2022 Approximatrix, LLC + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +The Software shall be used for Good, not Evil. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..63aa50c --- /dev/null +++ b/README.md @@ -0,0 +1,23 @@ +FPoint - PowerPoint to Text Converter +===================================== + +This program is a crude attempt to extract text from PowerPoint +presentations files (*.pptx only, no legacy) written almost entirely in +Fortran. The code attempts to decipher what is meant by each slide's +structure in a vague manner. The text generated, at this time, is +loosely based on gemtext, generally considered a subset of Markdown. + +Please see LICENSE for the license terms. + +Requirements +------------ + +This program requires the following: + + Fortran XML (or FoX) + https://github.com/andreww/fox + + libzip-wrapper + https://git.approximatrix.com/cgit.cgi/libzip-wrapper/ + + libzip (indirectly due to the above) diff --git a/driver.f90 b/driver.f90 new file mode 100644 index 0000000..eae0b34 --- /dev/null +++ b/driver.f90 @@ -0,0 +1,133 @@ +! Copyright (c) 2022 Approximatrix, LLC +! +! Permission is hereby granted, free of charge, to any person obtaining a copy +! of this software and associated documentation files (the "Software"), to deal +! in the Software without restriction, including without limitation the rights +! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +! copies of the Software, and to permit persons to whom the Software is +! furnished to do so, subject to the following conditions: +! +! The above copyright notice and this permission notice shall be included in +! all copies or substantial portions of the Software. +! +! The Software shall be used for Good, not Evil. +! +! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +! SOFTWARE. + +program driver +use fpoint_pptxzip +implicit none + + type(pptxtracted)::presentation + !character(len=*), parameter::filename = "Welcome to PowerPoint.pptx" !"samplepptx.pptx" + logical::verbose + logical::notes + integer::i, j + character(len=:), pointer::filename, arg + + if(command_argument_count() < 1) then + call usage() + call exit(0) + end if + + verbose = .false. + notes = .false. + filename => null() + do i = 1, command_argument_count() + + call get_command_argument(i, length=j) + allocate(character(len=j)::arg) + call get_command_argument(i, value=arg) + + if(arg == "-n") then + notes = .true. + + else if(arg == "-v") then + verbose = .true. + + else + filename => arg + arg => null() + + end if + + if(associated(arg)) then + deallocate(arg) + arg => null() + end if + + end do + + if(verbose) then + call title() + end if + call maybe_print(verbose, "Loading "//filename) + + if(presentation%open(filename)) then + + call maybe_print(verbose, "File opened at "//trim(presentation%directory)) + + call presentation%parse() + + write(*, '(A)') presentation%to_text() + + call presentation%close() + + else + call maybe_print(verbose, "Failed on "//filename) + call exit(1) + end if + + call exit(0) + +contains + + subroutine title() + implicit none + + Print *, "FPoint - PowerPoint to Text Converter" + Print *, "Copyright 2022 Approximatrix, LLC" + + end subroutine title + + subroutine usage() + implicit none + + character(512)::prg + + call get_command_argument(0, prg) + + call title() + + Print *, " " + Print *, "Usage: "//trim(prg)//" [options] " + Print *, " " + Print *, "Options:" + Print *, " " + Print *, " -n Extract notes instead of slide contents" + Print *, " -v Be somewhat verbose, probably not enough to be interesting" + Print *, " " + Print *, "Output is printed to standard out" + Print *, " " + + end subroutine usage + + subroutine maybe_print(v, str) + implicit none + + logical, intent(in)::v + character(len=*), intent(in)::str + + if(v) then + Print *, str + end if + end subroutine maybe_print + +end program driver + \ No newline at end of file diff --git a/fpoint.prj b/fpoint.prj new file mode 100644 index 0000000..5d66372 --- /dev/null +++ b/fpoint.prj @@ -0,0 +1,78 @@ +{ + "Root":{ + "Folders":[], + "Name":"+fpoint (fpoint.exe)", + "Files":[{ + "filename":".\\driver.f90", + "enabled":"1" + },{ + "filename":".\\fsutil.F90", + "enabled":"1" + },{ + "filename":".\\pptxml.f90", + "enabled":"1" + },{ + "filename":".\\pptxzip.f90", + "enabled":"1" + },{ + "filename":".\\winsupport.c", + "enabled":"1" + }] + }, + "Name":"fpoint (fpoint.exe)", + "Options":{ + "Compiler Options":{ + "Fortran Flags":"-D__WIN32__", + "Link Flags":"-lFoX_dom -lFoX_sax -lFoX_utils -lFoX_common -lFoX_fsys -lfzip.dll -luser32 -lkernel32", + "C Flags":"" + }, + "Architecture":0, + "Type":0, + "Revision":2, + "Windows GUI":0, + "File Options":{ + "Library Directories":["Default Add-On Directory","../../Workspace/git/libzip-wrapper"], + "Build Directory":"build", + "Module Directory":"modules", + "Include Directories":["Default Add-On Include Directory","../../Workspace/git/libzip-wrapper/modules"] + }, + "Target":"fpoint.exe", + "Fortran Options":{ + "Use C Preprocessor":"false", + "Runtime Diagnostics":"false", + "Floating Point Exception Trap":0, + "Cray Pointers":"false", + "Enable Coarrays":"false", + "Enable OpenMP":"false", + "Initialize Variables to Zero":"false", + "Default Double for Real":"false" + }, + "Code Generation Options":{ + "CPU Specific":"false", + "Processor":"generic", + "Aggressive Loops":"false", + "Debugging":"true", + "Optimization Mode":0, + "Profiling":"false" + }, + "Build Dependencies":1, + "Launch Options":{ + "Working Directory":"", + "Launch Using MPI":"false", + "Keep Console":"true", + "External Console":"true", + "Command Line Arguments":"", + "Build Before Launch":"true" + }, + "Build Options":{ + "Auto Management":"true", + "Makefile":"Makefile.fpoint", + "Auto Makefile":"true" + }, + "Linker Options":{ + "Static Linking Mode":7, + "Link MPI Library":"false", + "Link LAPACK":0 + } + } +} \ No newline at end of file diff --git a/fsutil.F90 b/fsutil.F90 new file mode 100644 index 0000000..3dcf20c --- /dev/null +++ b/fsutil.F90 @@ -0,0 +1,88 @@ +! Copyright (c) 2022 Approximatrix, LLC +! +! Permission is hereby granted, free of charge, to any person obtaining a copy +! of this software and associated documentation files (the "Software"), to deal +! in the Software without restriction, including without limitation the rights +! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +! copies of the Software, and to permit persons to whom the Software is +! furnished to do so, subject to the following conditions: +! +! The above copyright notice and this permission notice shall be included in +! all copies or substantial portions of the Software. +! +! The Software shall be used for Good, not Evil. +! +! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +! SOFTWARE. + +module fpoint_fsutil +implicit none + +#ifdef __WIN32__ + character, parameter::pathsep = '\' +#else + character, parameter::pathsep = '/' +#endif + +contains + + subroutine append_to_path(path, additive) + implicit none + + character(len=*), intent(inout)::path + character(len=*), intent(in)::additive + integer::istart + + if(path(len_trim(path):len_trim(path)) /= pathsep) then + path = trim(path)//pathsep + end if + + istart = 1 + if(additive(1:1) == pathsep) then + istart = 2 + end if + + path = trim(path)//additive(istart:len_trim(additive)) + + end subroutine append_to_path + + subroutine get_temporary_directory(dir) + implicit none + + character(len=*), intent(out)::dir + + call get_environment_variable("TEMP", dir) + + end subroutine get_temporary_directory + + subroutine make_directory(dir) + use iso_c_binding + implicit none + + character(len=*), intent(in)::dir + +#ifdef __WIN32__ + character(len=:, kind=c_char), pointer::cdir + interface + subroutine make_dir(str) bind(c, name="make_directory_on_windows") + use iso_c_binding + type(c_ptr), value::str + end subroutine make_dir + end interface + + allocate(character(len=(len_trim(dir)+1))::cdir) + cdir = trim(dir)//c_null_char + call make_dir(c_loc(cdir)) + deallocate(cdir) +#else + call execute_command_line('mkdir "'//trim(dir)//'"', wait=.true.) +#endif + + end subroutine make_directory + +end module fpoint_fsutil \ No newline at end of file diff --git a/pptxml.f90 b/pptxml.f90 new file mode 100644 index 0000000..c785bb8 --- /dev/null +++ b/pptxml.f90 @@ -0,0 +1,664 @@ +! Copyright (c) 2022 Approximatrix, LLC +! +! Permission is hereby granted, free of charge, to any person obtaining a copy +! of this software and associated documentation files (the "Software"), to deal +! in the Software without restriction, including without limitation the rights +! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +! copies of the Software, and to permit persons to whom the Software is +! furnished to do so, subject to the following conditions: +! +! The above copyright notice and this permission notice shall be included in +! all copies or substantial portions of the Software. +! +! The Software shall be used for Good, not Evil. +! +! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +! SOFTWARE. + +module fpoint_pptxml +use FoX_dom +implicit none + + private + public::slide + + integer, parameter::list_single_bullet_max_length = 512 + + integer, parameter::SLIDENUM_UNKNOWN = 0 + integer, parameter::SLIDENUM_DOESNOTEXIST = -1 + integer, parameter::SLIDENUM_ERRORPARSING = -2 + + character(*), parameter::presentation_schema = "http://schemas.openxmlformats.org/presentationml/2006/main" + character(*), parameter::drawing_schema = "http://schemas.openxmlformats.org/drawingml/2006/main" + + type::slide + + integer::number + + character(len=:), pointer::title + + type(Node), pointer::slide_dom + class(element), pointer::content + + contains + + procedure :: load => slide_load_filename + procedure :: to_text => slide_to_text + procedure :: text_length => slide_text_length + + end type slide + + type, abstract::element + class(element), pointer :: next + contains + procedure(from_node), deferred :: from_node + procedure(to_text), deferred :: to_text + procedure :: length => element_text_length + end type element + + abstract interface + function to_text(self) + import::element + class(element), intent(in)::self + character(len=:), pointer::to_text + end function to_text + end interface + + abstract interface + subroutine from_node(self, n) + use FoX_dom + import::element + class(element), intent(inout)::self + type(Node), pointer::n + end subroutine from_node + end interface + + type, extends(element)::text_element + character(len=:), pointer::text + contains + procedure::to_text => text_element_to_text + procedure::from_node => text_element_from_node + end type text_element + + type, extends(text_element)::heading_element + contains + procedure::to_text => heading_element_to_text + end type heading_element + + type, extends(element)::list_element + character(len=list_single_bullet_max_length), dimension(:), pointer::text + contains + procedure::to_text => list_element_to_text + procedure::from_node => list_element_from_node + end type list_element + +contains + + function get_child_text(n) result(t) + use FoX_dom + implicit none + + type(Node), pointer::n, tn + character(len=:), pointer::t, resize_t + type(NodeList), pointer::text_nodes + + text_nodes => getElementsByTagNameNS(n, drawing_schema, "t") + if(getLength(text_nodes) >= 1) then + tn => item(text_nodes, 0) + allocate(character(len=1024) :: resize_t) + call extractDataContent(tn, resize_t) + allocate(character(len=len_trim(resize_t)) :: t) + t = resize_t + deallocate(resize_t) + end if + + end function get_child_text + + function find_slide_title(dom) result(t) + use FoX_dom + implicit none + + type(Node), pointer::dom + character(len=:), pointer::t + + type(NodeList), pointer::nonvis_props + type(Node), pointer::one_props, sp, nonvis_sp + character(len=6)::temp + integer::i + + t => null() + nonvis_props => getElementsByTagNameNS(dom, presentation_schema, "cNvPr") + + do i=1, getLength(nonvis_props) + one_props => item(nonvis_props, i-1) + if(hasAttribute(one_props, "name")) then + temp = getAttribute(one_props, "name") + if(temp(1:6) == "Title ") then + exit + end if + end if + one_props => null() + end do + + if(associated(one_props)) then + nonvis_sp => getParentNode(one_props) + sp => getParentNode(nonvis_sp) + if(associated(sp)) then + t => concatenated_text_in_rows(getElementsByTagNameNS(sp, drawing_schema, "r")) + end if + end if + + end function find_slide_title + + function parse_one_sp_node(n) result(e) + use FoX_dom + implicit none + + type(Node), pointer::n + class(element), pointer::e + + type(heading_element), pointer::he + type(text_element), pointer::te + type(list_element), pointer::le + + type(NodeList), pointer::nonvis_sp, bullet_defns + type(Node), pointer::prop_node + + integer::i + + e => null() + + nonvis_sp => getElementsByTagNameNS(n, presentation_schema, "cNvPr") + do i=1, getLength(nonvis_sp) + prop_node => item(nonvis_sp, i-1) + + ! Try based on property name + if(hasAttribute(prop_node, "name")) then + + ! if "Title" or "Subtitle" is present + if(index(getAttribute(prop_node, "name"), "itle") > 0) then + allocate(he) + call he%from_node(n) + e => he + exit + end if + + end if + + end do + + ! Next, check for bullets + if(.not. associated(e)) then + bullet_defns => getElementsByTagNameNS(n, drawing_schema, "cNvPr") + if(associated(bullet_defns)) then + if(getLength(bullet_defns) > 0) then + allocate(le) + call le%from_node(n) + e => le + end if + end if + end if + + if(.not. associated(e)) then + bullet_defns => getElementsByTagNameNS(n, drawing_schema, "lstStyle") + if(associated(bullet_defns)) then + if(getLength(bullet_defns) > 0) then + allocate(le) + call le%from_node(n) + e => le + end if + end if + end if + + ! Finally, try treating as a text node + if(.not. associated(e)) then + allocate(te) + call te%from_node(n) + e=>te + end if + + if(associated(e)) then + if(e%length() == 0) then + deallocate(e) + e => null() + else + e%next => null() + end if + end if + + end function parse_one_sp_node + + recursive function walk_sp_tree(parent) result(elements) + use FoX_dom + implicit none + + type(Node), pointer::parent + class(element), pointer::elements + + type(Node), pointer::one_child + type(NodeList), pointer::child_nodes + class(element), pointer::child_elements, walker + integer::i + type(DomException)::fex + character(32)::tname + + elements => null() + + child_nodes => getChildNodes(parent) + do i = 1, getLength(child_nodes) + one_child => item(child_nodes, i-1) + + child_elements => null() + + tname = getTagName(one_child, fex) + + if(.not. inException(fex)) then + ! Single sp + if(trim(tname) == "p:sp") then + child_elements => parse_one_sp_node(one_child) + + ! A group of sp's + elseif(trim(tname) == "p:grpSp") then + child_elements => walk_sp_tree(one_child) + + end if + + if(associated(child_elements)) then + + ! Append + if(associated(elements)) then + + walker => elements + do while(associated(walker%next)) + walker => walker%next + end do + walker%next => child_elements + + ! First one! + else + elements => child_elements + end if + + end if + end if + + end do + + end function walk_sp_tree + + subroutine parse_slide_sp(self) + use FoX_dom + implicit none + + class(slide), intent(inout)::self + + type(NodeList), pointer::nl + type(Node), pointer::sptree + + nl => getElementsByTagNameNS(self%slide_dom, & + presentation_schema, "spTree") + + self%content => null() + if(associated(nl)) then + if(getLength(nl) >= 1) then + sptree => item(nl, 0) + self%content => walk_sp_tree(sptree) + end if + end if + + end subroutine parse_slide_sp + + subroutine slide_load_filename(self, filename) + use FoX_dom + implicit none + + class(slide), intent(out)::self + character(len=*), intent(in)::filename + character(len=:), pointer::filename_no_backslashes + + ! For parsing the title out + type(NodeList), pointer::sp_elements + type(Node), pointer::title_parent + + integer::ext, startnum, ios + logical::existence + + integer::i + + class(element), pointer::walker, tmp + + self%number = SLIDENUM_UNKNOWN + + inquire(file=filename, exist=existence) + if(.not. existence) then + self%number = SLIDENUM_DOESNOTEXIST + return + end if + + ext = index(filename, ".xml", back=.true.) + if(ext >= 1) then + startnum = index(filename(1:ext), "slide", back=.true.) + if(startnum >= 1) then + startnum = startnum + 5 + read(filename(startnum:ext-1), *, iostat=ios) self%number + end if + end if + + ! Now actually load the xml + if(index(filename, "\") > 0) then + allocate(character(len=len_trim(filename))::filename_no_backslashes) + filename_no_backslashes = filename + i = index(filename_no_backslashes, '\') + do while(i > 0) + filename_no_backslashes(i:i) = '/' + i = index(filename_no_backslashes, '\') + end do + ! This was all very stupid... + self%slide_dom => parseFile(filename_no_backslashes) + deallocate(filename_no_backslashes) + else + self%slide_dom => parseFile(filename) + end if + if(.not. associated(self%slide_dom)) then + self%number = SLIDENUM_ERRORPARSING + return + end if + + self%title => find_slide_title(self%slide_dom) + if(.not. associated(self%title) .and. self%number > 0) then + allocate(character(len=16)::self%title) + write(self%title, '(I8)') self%number + self%title = "Slide "//trim(adjustl(self%title)) + end if + + call parse_slide_sp(self) + + ! This gets rid of double-accounting for the title + if(associated(self%content) .and. associated(self%title)) then + tmp => null() + walker => self%content + do while(associated(walker)) + if(self%content%to_text() == "## "//self%title) then + if(.not.associated(tmp)) then + self%content => walker%next + else + tmp%next => walker%next + end if + deallocate(walker) + exit + end if + tmp => walker + walker => walker%next + end do + end if + + end subroutine slide_load_filename + + function element_text_length(self) + implicit none + + class(element), intent(in)::self + integer::element_text_length + character(len=:), pointer::text_temp + + text_temp => self%to_text() + if(associated(text_temp)) then + element_text_length = len_trim(text_temp) + deallocate(text_temp) + else + element_text_length = 0 + end if + + end function element_text_length + + function heading_element_to_text(self) result(t) + implicit none + + class(heading_element), intent(in)::self + character(len=:), pointer::t + + t => null() + if(associated(self%text)) then + allocate(character(len=len_trim(self%text)+3) :: t) + t = "## "//self%text + end if + + end function heading_element_to_text + + function text_element_to_text(self) result(t) + implicit none + + class(text_element), intent(in)::self + character(len=:), pointer::t + + t => null() + if(associated(self%text)) then + allocate(character(len=len_trim(self%text)) :: t) + t = self%text + end if + end function text_element_to_text + + function concatenated_text_in_row(r) result(t) + use FoX_dom + implicit none + + type(Node), pointer::r + character(len=:), pointer::t + type(NodeList), pointer::text_nodes + type(Node), pointer::tn, bairn + integer::i, text_length, j + + text_length = 0 + text_nodes => getElementsByTagNameNS(r, drawing_schema, "t") + do i = 1, getLength(text_nodes) + tn => item(text_nodes, i-1) + do j = 1, getLength(getChildNodes(tn)) + bairn => item(getChildNodes(tn), i-1) + text_length = text_length + len(getNodeValue(bairn)) + end do + end do + + allocate(character(len=text_length)::t) + text_length = 1 + do i = 1, getLength(text_nodes) + tn => item(text_nodes, i-1) + do j = 1, getLength(getChildNodes(tn)) + bairn => item(getChildNodes(tn), i-1) + + t(text_length:len(t)) = getNodeValue(bairn) + text_length = text_length + len(getNodeValue(bairn)) + end do + + end do + + end function concatenated_text_in_row + + function concatenated_text_in_rows(rows) result(t) + implicit none + + type(NodeList), pointer::rows + character(len=:), pointer::t + + character(len=:), pointer::one_row_text + integer::i, text_length + + t => null() + if(getLength(rows) == 0) then + return + end if + + text_length = 0 + do i = 1, getLength(rows) + one_row_text => concatenated_text_in_row(item(rows, i-1)) + if(associated(one_row_text)) then + text_length = text_length + len(one_row_text) + deallocate(one_row_text) + end if + end do + + allocate(character(len=text_length) :: t) + text_length = 1 + do i = 1, getLength(rows) + one_row_text => concatenated_text_in_row(item(rows, i-1)) + if(associated(one_row_text)) then + t(text_length:len(t)) = one_row_text + text_length = text_length + len(one_row_text) + deallocate(one_row_text) + end if + end do + + end function concatenated_text_in_rows + + subroutine text_element_from_node(self, n) + use FoX_dom + implicit none + + class(text_element), intent(inout)::self + type(Node), pointer::n + type(NodeList), pointer::rows + + rows => getElementsByTagNameNS(n, drawing_schema, "r") + self%text => concatenated_text_in_rows(rows) + + end subroutine text_element_from_node + + function slide_to_text(self) result(t) + implicit none + + class(slide), intent(in)::self + character(len=:), pointer::t + + character(len=:), pointer::one_element + class(element), pointer::walker + integer::text_length + + text_length = 0 + walker => self%content + do while(associated(walker)) + text_length = text_length + walker%length() + 2*len(new_line(' ')) + walker => walker%next + end do + + if(associated(self%title)) then + text_length = text_length + 2 + len_trim(self%title) + 2*len(new_line(' ')) + 2 + end if + + allocate(character(len=text_length) :: t) + t = " " + text_length = 1 + + if(associated(self%title)) then + t = "# "//trim(self%title)//repeat(new_line(' '),2) + text_length = text_length + 2 + len_trim(self%title) + 2*len(new_line(' ')) + end if + + walker => self%content + do while(associated(walker)) + one_element => walker%to_text() + if(associated(one_element)) then + t(text_length:len(t)) = one_element//repeat(new_line(' '),2) + text_length = text_length + len_trim(one_element) + 2*len(new_line(' ')) + deallocate(one_element) + end if + walker => walker%next + end do + + end function slide_to_text + + function slide_text_length(self) + implicit none + + class(slide), intent(in)::self + integer::slide_text_length + + character(len=:), pointer::tmp + + tmp => self%to_text() + if(associated(tmp)) then + slide_text_length = len_trim(tmp) + else + slide_text_length = 0 + end if + + end function slide_text_length + + subroutine list_element_from_node(self, n) + use FoX_dom + implicit none + + class(list_element), intent(inout)::self + type(Node), pointer::n + + character(len=:), pointer::one_text + integer::i, j + type(NodeList), pointer::p_nodes, row_nodes + type(Node), pointer:: one_p + + character(len=list_single_bullet_max_length), dimension(:), pointer::tmp + + p_nodes => getElementsByTagNameNS(n, drawing_schema, "p") + allocate(tmp(getLength(p_nodes))) + tmp = " " + + j = 0 + do i = 1, getLength(p_nodes) + one_p => item(p_nodes, i-1) + row_nodes => getElementsByTagNameNS(one_p, drawing_schema, "r") + one_text => concatenated_text_in_rows(row_nodes) + if(associated(one_text)) then + if(len_trim(one_text) > 0) then + j = j + 1 + tmp(j) = one_text + end if + deallocate(one_text) + end if + end do + + ! j now holds the number of bullets with actual text + allocate(self%text(j)) + self%text = tmp(1:j) + deallocate(tmp) + + end subroutine list_element_from_node + + function list_element_to_text(self) result(t) + implicit none + + class(list_element), intent(in)::self + character(len=:), pointer::t + + integer::i, text_length + + t => null() + if(.not. associated(self%text)) then + return + end if + + text_length = 0 + do i = 1, size(self%text) + text_length = text_length + & + len_trim(self%text(i)) + & + len(new_line(' ')) + 2 + end do + + allocate(character(len=text_length) :: t) + t = " " + + text_length = 1 + do i = 1, size(self%text) + if(i < size(self%text)) then + t(text_length:len(t)) = "* "//trim(self%text(i))//new_line(' ') + text_length = text_length + len(new_line(' ')) + len_trim(self%text(i))+2 + else + t(text_length:len(t)) = "* "//trim(self%text(i)) + end if + end do + + end function list_element_to_text + +end module fpoint_pptxml \ No newline at end of file diff --git a/pptxzip.f90 b/pptxzip.f90 new file mode 100644 index 0000000..d526125 --- /dev/null +++ b/pptxzip.f90 @@ -0,0 +1,204 @@ +! Copyright (c) 2022 Approximatrix, LLC +! +! Permission is hereby granted, free of charge, to any person obtaining a copy +! of this software and associated documentation files (the "Software"), to deal +! in the Software without restriction, including without limitation the rights +! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +! copies of the Software, and to permit persons to whom the Software is +! furnished to do so, subject to the following conditions: +! +! The above copyright notice and this permission notice shall be included in +! all copies or substantial portions of the Software. +! +! The Software shall be used for Good, not Evil. +! +! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +! SOFTWARE. + +module fpoint_pptxzip +use fpoint_pptxml +implicit none + + private + public::pptxtracted + + type :: pptxtracted + + character(len=2048)::directory + character(len=2048), dimension(:), pointer::slide_paths + character(len=2048), dimension(:), pointer::note_paths + + type(slide), dimension(:), pointer::slides + + contains + + procedure :: open => pptx_open + procedure :: close => pptx_close + procedure :: parse => pptx_parse + procedure :: to_text => pptx_to_text + procedure :: slide_has_notes + procedure :: slide_count + + end type pptxtracted + +contains + + function slide_has_notes(self, i) + implicit none + + class(pptxtracted), intent(in)::self + integer, intent(in)::i + logical::slide_has_notes + + if(i > size(self%note_paths) .or. i < 1) then + slide_has_notes = .false. + else + slide_has_notes = (len_trim(self%note_paths(i)) > 0) + end if + + end function slide_has_notes + + function slide_count(self) + implicit none + + class(pptxtracted), intent(in)::self + integer::slide_count + + if(associated(self%slide_paths)) then + slide_count = size(self%slide_paths) + else + slide_count = 0 + end if + + end function slide_count + + function pptx_open(self, filename) result(res) + use fpoint_fsutil + use m_unzip, only: unzip + implicit none + + logical::res + class(pptxtracted), intent(out)::self + character(len=*), intent(in)::filename + integer::i, slidecount + character(len=2048)::slidedir, slidecheck, notesdir + character(len=4)::numtext + logical::existence + + call get_temporary_directory(self%directory) + i = index(filename, pathsep, back=.true.) + 1 + call append_to_path(self%directory, filename(i:len_trim(filename))//".extracted") + call make_directory(trim(self%directory)) + + ! Extract the file into said directory + res = unzip(filename, trim(self%directory)) + + ! Count slides + slidedir = self%directory + call append_to_path(slidedir, 'ppt') + call append_to_path(slidedir, 'slides') + do i = 1, 512 + slidecheck = slidedir + write(numtext, '(I4)') i + call append_to_path(slidecheck, "slide"//trim(adjustl(numtext))//".xml") + inquire(file=slidecheck, exist=existence) + if(.not. existence) then + exit + end if + end do + slidecount = i - 1 + + allocate(self%slide_paths(slidecount)) + self%slide_paths = slidedir + do i = 1, slidecount + write(numtext, '(I4)') i + call append_to_path(self%slide_paths(i), "slide"//trim(adjustl(numtext))//".xml") + end do + + ! Now check for notes + notesdir = self%directory + call append_to_path(notesdir, 'ppt') + call append_to_path(notesdir, 'notesSlides') + allocate(self%note_paths(slidecount)) + self%note_paths = notesdir + do i = 1, slidecount + write(numtext, '(I4)') i + call append_to_path(self%note_paths(i), "notesSlide"//trim(adjustl(numtext))//".xml") + inquire(file=self%note_paths(i), exist=existence) + if(.not. existence) then + self%note_paths(i) = " " + end if + end do + + ! These should only be explicitly parsed + self%slides => null() + + end function pptx_open + + subroutine pptx_close(self) + implicit none + + class(pptxtracted), intent(inout)::self + + ! Delete directory tree at self%directory + + deallocate(self%slide_paths) + self%slide_paths => null() + + deallocate(self%note_paths) + self%note_paths => null() + + self%directory = " " + + end subroutine pptx_close + + subroutine pptx_parse(self) + use fpoint_pptxml + implicit none + + class(pptxtracted), intent(inout)::self + integer::i + allocate(self%slides(self%slide_count())) + + do i = 1, self%slide_count() + call self%slides(i)%load(self%slide_paths(i)) + end do + + end subroutine pptx_parse + + function pptx_to_text(self) result(t) + use fpoint_pptxml + implicit none + + class(pptxtracted), intent(in)::self + integer::text_length + character(len=:), pointer::t, tmp + integer::i + + t => null() + + if(self%slide_count() > 0 .and. associated(self%slides)) then + text_length = 0 + do i = 1, self%slide_count() + text_length = text_length + self%slides(i)%text_length() + end do + + allocate(character(len=text_length) :: t) + + text_length = 1 + do i = 1, self%slide_count() + tmp => self%slides(i)%to_text() + t(text_length:len(t)) = trim(tmp) + text_length = text_length + len_trim(tmp) + end do + + end if + + end function pptx_to_text + +end module fpoint_pptxzip diff --git a/winsupport.c b/winsupport.c new file mode 100644 index 0000000..7479b8a --- /dev/null +++ b/winsupport.c @@ -0,0 +1,30 @@ +/* Copyright (c) 2022 Approximatrix, LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * The Software shall be used for Good, not Evil. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +/* I can't even believe this is necessary on win32 32-bit... */ +void make_directory_on_windows(char *cdir) +{ + CreateDirectory(cdir, NULL); +} -- cgit v1.2.3