root/trunk/pici-server/ADMIN/picidae.py

Revision 4, 14.8 kB (checked in by mj, 12 years ago)

setup process, added flexibility

  • Property svn:executable set to *
Line 
1 #!/usr/bin/env python
2
3 # picidae.py - makes screenshots of webpages
4 # and analyzes the webpage structure and writes image-maps of the links
5 # as well as forms that are placed on the exact position of the old form.
6 # It is a part of the art project www.picidae.net
7 # http://www.picidae.net
8
9 #
10 # This script is based on webkit2png from Paul Hammond.
11 # It was extended by picidae.net
12 #
13 # This program is free software; you can redistribute it and/or
14 # modify it under the terms of the GNU General Public License
15 # as published by the Free Software Foundation; either version 2
16 # of the License, or (at your option) any later version.
17 #
18 # This program is distributed in the hope that it will be useful,
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 # GNU General Public License for more details.
22 #
23 # You should have received a copy of the GNU General Public License
24 # along with this program; if not, write to the Free Software
25 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
26
27
28 __version__ = "1.0"
29
30
31 import sys
32
33
34 #print "hello ... "
35
36
37 try:
38   import Foundation
39   import WebKit
40   import AppKit
41   import objc
42   import urllib
43 except ImportError:
44   print "Cannot find pyobjc library files.  Are you sure it is installed?"
45   sys.exit()
46
47
48
49
50
51 #try:
52 #  from optparse import OptionParser
53 #except ImportError:
54 #  print "OptionParser not imported"
55 #  sys.exit()
56
57 from optparse import OptionParser
58
59
60 class AppDelegate (Foundation.NSObject):
61     # what happens when the app starts up
62     def applicationDidFinishLaunching_(self, aNotification):
63         webview = aNotification.object().windows()[0].contentView()
64         webview.frameLoadDelegate().getURL(webview)
65
66
67 class WebkitLoad (Foundation.NSObject, WebKit.protocols.WebFrameLoadDelegate):
68     # what happens if something goes wrong while loading
69     def webView_didFailLoadWithError_forFrame_(self,webview,error,frame):
70         print " ... something went wrong 1"
71         self.getURL(webview)
72     def webView_didFailProvisionalLoadWithError_forFrame_(self,webview,error,frame):
73         print " ... something went wrong 2"
74         self.getURL(webview)
75
76     def makeFilename(self,URL,options):
77        # make the filename
78        if options.filename:
79          filename = options.filename
80        elif options.md5:
81          try:
82                 import md5
83          except ImportError:
84                 print "--md5 requires python md5 library"
85                 AppKit.NSApplication.sharedApplication().terminate_(None)
86          filename = md5.new(URL).hexdigest()
87        else:
88          import re
89          filename = re.sub('\W','',URL);
90          filename = re.sub('^http','',filename);
91        if options.datestamp:
92          import time
93          now = time.strftime("%Y%m%d")
94          filename = now + "-" + filename
95        import os
96        dir = os.path.abspath(os.path.expanduser(options.dir))
97        return os.path.join(dir,filename)
98
99     def saveImages(self,bitmapdata,filename,options):
100         # save the fullsize png
101         if options.fullsize:
102             bitmapdata.representationUsingType_properties_(AppKit.NSPNGFileType,None).writeToFile_atomically_(filename + ".png",objc.YES)
103
104         if options.thumb or options.clipped:
105             # work out how big the thumbnail is
106             width = bitmapdata.pixelsWide()
107             height = bitmapdata.pixelsHigh()
108             thumbWidth = (width * options.scale)
109             thumbHeight = (height * options.scale)
110
111             # make the thumbnails in a scratch image
112             scratch = AppKit.NSImage.alloc().initWithSize_(
113                     Foundation.NSMakeSize(thumbWidth,thumbHeight))
114             scratch.lockFocus()
115             AppKit.NSGraphicsContext.currentContext().setImageInterpolation_(
116                     AppKit.NSImageInterpolationHigh)
117             thumbRect = Foundation.NSMakeRect(0.0, 0.0, thumbWidth, thumbHeight)
118             clipRect = Foundation.NSMakeRect(0.0,
119                     thumbHeight-options.clipheight,
120                     options.clipwidth, options.clipheight)
121             bitmapdata.drawInRect_(thumbRect)
122             thumbOutput = AppKit.NSBitmapImageRep.alloc().initWithFocusedViewRect_(thumbRect)
123             clipOutput = AppKit.NSBitmapImageRep.alloc().initWithFocusedViewRect_(clipRect)
124             scratch.unlockFocus()
125            
126             # save the thumbnails as pngs
127             if options.thumb:
128                 thumbOutput.representationUsingType_properties_(
129                         AppKit.NSPNGFileType,None
130                     ).writeToFile_atomically_(filename + "-thumb.png",objc.YES)
131             if options.clipped:
132                 clipOutput.representationUsingType_properties_(
133                         AppKit.NSPNGFileType,None
134                     ).writeToFile_atomically_(filename + "-clipped.png",objc.YES)
135
136     def getURL(self,webview):
137         if self.urls:
138             if self.urls[0] == '-':
139                 url = sys.stdin.readline().rstrip()
140                 if not url: AppKit.NSApplication.sharedApplication().terminate_(None)
141             else:
142                 url = self.urls.pop(0)
143         else:
144             AppKit.NSApplication.sharedApplication().terminate_(None)
145         #print "<urlcall href=\"\" />", url, "..."
146         #print "<urlcall href=\"%s\" />" % (url)
147         self.resetWebview(webview)
148         webview.mainFrame().loadRequest_(Foundation.NSURLRequest.requestWithURL_(Foundation.NSURL.URLWithString_(url)))
149         if not webview.mainFrame().provisionalDataSource():
150             print "<nosuccess  />"
151             self.getURL(webview)
152      
153     def resetWebview(self,webview):
154         rect = Foundation.NSMakeRect(0,0,self.options.initWidth,self.options.initHeight)
155         webview.window().setContentSize_((self.options.initWidth,self.options.initHeight))
156         webview.setFrame_(rect)
157    
158     def resizeWebview(self,view):
159         view.window().display()
160         view.window().setContentSize_(view.bounds().size)
161         view.setFrame_(view.bounds())
162
163     def captureView(self,view):
164         view.lockFocus()
165         bitmapdata = AppKit.NSBitmapImageRep.alloc()
166         bitmapdata.initWithFocusedViewRect_(view.bounds())
167         view.unlockFocus()
168         return bitmapdata
169  
170     # what happens when the page has finished loading
171     def webView_didFinishLoadForFrame_(self,webview,frame):
172         # don't care about subframes
173         if (frame == webview.mainFrame()):
174             view = frame.frameView().documentView()
175
176             self.resizeWebview(view)
177
178             URL = frame.dataSource().initialRequest().URL().absoluteString()
179             filename = self.makeFilename(URL, self.options)
180
181             bitmapdata = self.captureView(view) 
182             self.saveImages(bitmapdata,filename,self.options)
183
184             # ----------------------------------
185             # picidae my stuff
186
187
188             #print "url"
189             print "<page>"
190             print frame.dataSource().request().URL().absoluteString()
191             print "</page>"
192
193
194             # Analyse HTML and get links
195             xmloutput = "<map name=\"map\">\r";
196            
197             domdocument = frame.DOMDocument()
198             domnodelist = domdocument.getElementsByTagName_('A')
199             i = 0
200             while  i < domnodelist.length():
201                 # linkvalue
202                 value = domnodelist.item_(i).valueForKey_('href')
203                
204                 # position-rect
205                 myrect = domnodelist.item_(i).boundingBox()
206                
207                 xmin = Foundation.NSMinX(myrect)
208                 ymin = Foundation.NSMinY(myrect)
209                 xmax = Foundation.NSMaxX(myrect)
210                 ymax = Foundation.NSMaxY(myrect)
211                
212                 # print Link
213                 prefix = ""
214                 xmloutput += "<area shape=\"rect\" coords=\"%i,%i,%i,%i\" alt=\"\"><![CDATA[%s%s]]></area>\r" % (xmin, ymin, xmax, ymax, prefix, value)
215                 i += 1
216            
217             #print "</map>"
218             xmloutput += "</map>"
219             f = open(filename +'.xml', 'w+')
220             f.write(xmloutput)
221             f.close()
222            
223             # ----------------------------------
224             # get forms
225             xmloutput = "<forms>\r";
226             xmloutput += "<page><![CDATA["
227             xmloutput += frame.dataSource().request().URL().absoluteString()
228             xmloutput += "]]></page>\r"
229            
230             domdocument = frame.DOMDocument()
231             domnodelist = domdocument.getElementsByTagName_('form')
232             i = 0
233             while  i < domnodelist.length():
234                 # form
235                 action = domnodelist.item_(i).valueForKey_('action')
236                 method = domnodelist.item_(i).valueForKey_('method')
237                 xmloutput += "<form method=\"%s\" ><action><![CDATA[%s]]></action>\r" % (method, action)
238                
239                 # form fields
240                 fieldlist = domnodelist.item_(i).getElementsByTagName_('input')
241                 j=0
242                 while  j < fieldlist.length():
243                         # values
244                         type = fieldlist.item_(j).valueForKey_('type')
245                         name = fieldlist.item_(j).valueForKey_('name')
246                         formvalue = fieldlist.item_(j).valueForKey_('value')
247                         size = fieldlist.item_(j).valueForKey_('size')
248                         checked = fieldlist.item_(j).valueForKey_('checked')
249                         # write output
250                         xmloutput += "\t<input "
251                         if (type):
252                                 xmloutput += "type=\"%s\" " % (type)
253                         if (name):
254                                 xmloutput += "name=\"%s\" " % (name)
255                         if (size):
256                                 xmloutput += "size=\"%s\" " % (size)
257                         if (type and type != "hidden"):
258                                 myrect = fieldlist.item_(j).boundingBox()
259                                 xmin = Foundation.NSMinX(myrect)
260                                 ymin = Foundation.NSMinY(myrect)
261                                 xmax = Foundation.NSMaxX(myrect)
262                                 ymax = Foundation.NSMaxY(myrect)
263                                 height = ymax - ymin
264                                 width = xmax - xmin
265                                 if (type == "radio" or type == "checkbox"):
266                                         xmin -= 3
267                                         ymin -= 3
268                                 xmloutput += "style=\"position:absolute;top:%i;left:%i;width:%i;height:%i;\" " % (ymin, xmin, width, height)
269                         if (checked):
270                                 xmloutput += "checked=\"%s\" " % (checked)
271                         xmloutput += "><![CDATA["       
272                         if (formvalue and type!="text" and type!="password"):
273                                 #xmloutput += urllib.quote(formvalue)
274                                 dummy=10
275                         xmloutput += "]]></input>\r"
276                         j += 1
277                 xmloutput += "</form>\r"
278                 i += 1
279            
280             xmloutput += "</forms>"
281             f = open(filename +'.form.xml', 'w+')
282             f.write(xmloutput)
283             f.close()
284            
285            
286             # End picidae
287             # ----------------------------------
288            
289            
290             #print " ... done"
291             self.getURL(webview)
292            
293             #trying to give back the real url
294
295
296 def main():
297        
298     # parse the command line
299     usage = """%prog [options] [http://example.net/ ...]
300
301 examples:
302 %prog http://google.com/            # screengrab google
303 %prog -W 1000 -H 1000 http://google.com/ # bigger screengrab of google
304 %prog -T http://google.com/         # just the thumbnail screengrab
305 %prog -TF http://google.com/        # just thumbnail and fullsize grab
306 %prog -o foo http://google.com/     # save images as "foo-thumb.png" etc
307 %prog -                             # screengrab urls from stdin"""
308
309     cmdparser = OptionParser(usage, version=("webkit2png "+__version__))
310     # TODO: add quiet/verbose options
311     cmdparser.add_option("-W", "--width",type="float",default=800.0,
312        help="initial (and minimum) width of browser (default: 800)")
313     cmdparser.add_option("-H", "--height",type="float",default=600.0,
314        help="initial (and minimum) height of browser (default: 600)")
315     cmdparser.add_option("--clipwidth",type="float",default=200.0,
316        help="width of clipped thumbnail (default: 200)",
317        metavar="WIDTH")
318     cmdparser.add_option("--clipheight",type="float",default=150.0,
319        help="height of clipped thumbnail (default: 150)",
320        metavar="HEIGHT")
321     cmdparser.add_option("-s", "--scale",type="float",default=0.25,
322        help="scale factor for thumbnails (default: 0.25)")
323     cmdparser.add_option("-m", "--md5", action="store_true",
324        help="use md5 hash for filename (like del.icio.us)")
325     cmdparser.add_option("-o", "--filename", type="string",default="",
326        metavar="NAME", help="save images as NAME.png,NAME-thumb.png etc")
327     cmdparser.add_option("-F", "--fullsize", action="store_true",
328        help="only create fullsize screenshot")
329     cmdparser.add_option("-T", "--thumb", action="store_true",
330        help="only create thumbnail sreenshot")
331     cmdparser.add_option("-C", "--clipped", action="store_true",
332        help="only create clipped thumbnail screenshot")
333     cmdparser.add_option("-d", "--datestamp", action="store_true",
334        help="include date in filename")
335     cmdparser.add_option("-D", "--dir",type="string",default="./",
336        help="directory to place images into")
337     (options, args) = cmdparser.parse_args()
338     if len(args) == 0:
339         cmdparser.print_help()
340         return
341     if options.filename:
342         if len(args) != 1 or args[0] == "-":
343           print "--filename option requires exactly one url"
344           return
345     if options.scale == 0:
346       cmdparser.error("scale cannot be zero")
347     # make sure we're outputing something
348     if not (options.fullsize or options.thumb or options.clipped):
349       options.fullsize = True
350       options.thumb = True
351       options.clipped = True
352     # work out the initial size of the browser window
353     #  (this might need to be larger so clipped image is right size)
354     options.initWidth = (options.clipwidth / options.scale)
355     options.initHeight = (options.clipheight / options.scale)
356     if options.width>options.initWidth:
357        options.initWidth = options.width
358     if options.height>options.initHeight:
359        options.initHeight = options.height
360    
361    
362     app = AppKit.NSApplication.sharedApplication()
363    
364     # create an app delegate
365     delegate = AppDelegate.alloc().init()
366     AppKit.NSApp().setDelegate_(delegate)
367        
368     # create a window
369     rect = Foundation.NSMakeRect(-16000,-16000,100,100)
370     win = AppKit.NSWindow.alloc()
371     win.initWithContentRect_styleMask_backing_defer_ (rect,
372             AppKit.NSBorderlessWindowMask, 2, 0)
373        
374     # create a webview object
375     webview = WebKit.WebView.alloc()
376     webview.initWithFrame_(rect)
377     # turn off scrolling so the content is actually x wide and not x-15
378     webview.mainFrame().frameView().setAllowsScrolling_(objc.NO)
379     # add the webview to the window
380     win.setContentView_(webview)
381    
382    
383     # create a LoadDelegate
384     loaddelegate = WebkitLoad.alloc().init()
385     loaddelegate.options = options
386     loaddelegate.urls = args
387     webview.setFrameLoadDelegate_(loaddelegate)
388        
389     app.run()   
390
391 if __name__ == '__main__' : main()
392