cgi-bin/DW/Hooks/EmbedWhitelist.pm
author fu
Thu Feb 23 02:21:54 2012 +0800
changeset 4315 34b3c4ba3afb
parent 3716 e5aed810efb8
permissions -rw-r--r--
http://bugs.dwscoalition.org/show_bug.cgi?id=3859

Tweak for grammar / clarity / wording.

Patch by fu.
fu@3320
     1
#!/usr/bin/perl
fu@3320
     2
#
fu@3320
     3
# This code was based on code originally created by the LiveJournal project
fu@3320
     4
# owned and operated by Live Journal, Inc. The code has been modified and expanded
fu@3320
     5
# by Dreamwidth Studios, LLC. These files were originally licensed under
fu@3320
     6
# the terms of the license supplied by Live Journal, Inc, which can
fu@3320
     7
# currently be found at:
fu@3320
     8
#
fu@3320
     9
# http://code.livejournal.org/trac/livejournal/browser/trunk/LICENSE-LiveJournal.txt
fu@3320
    10
#
fu@3320
    11
# In accordance with the original license, this code and all its
fu@3320
    12
# modifications are provided under the GNU General Public License.
fu@3320
    13
# A copy of that license can be found in the LICENSE file included as
fu@3320
    14
# part of this distribution.
fu@3320
    15
#
fu@3320
    16
#
fu@3320
    17
# DW::Hooks::EmbedWhitelist
fu@3320
    18
#
fu@3320
    19
# Keep a whitelist of trusted sites which we trust for certain kinds of embeds
fu@3320
    20
#
fu@3320
    21
# Authors:
fu@3320
    22
#      Afuna <coder.dw@afunamatata.com>
fu@3320
    23
#
fu@3320
    24
# Copyright (c) 2011 by Dreamwidth Studios, LLC.
fu@3320
    25
fu@3320
    26
package DW::Hooks::EmbedWhitelist;
fu@3320
    27
fu@3320
    28
use strict;
fu@3320
    29
use LJ::Hooks;
fu@3700
    30
use URI;
fu@3700
    31
fu@3716
    32
# for internal use only
fu@3716
    33
# this is used when sites may offer embeds from multiple subdomain
fu@3716
    34
# e.g., www, www1, etc
fu@3700
    35
sub match_subdomain {
fu@3700
    36
    my $want_domain = $_[0];
fu@3700
    37
    my $domain_from_uri = $_[1];
fu@3700
    38
fu@3700
    39
    return $domain_from_uri =~ /^(?:[\w.-]*\.)?\Q$want_domain\E$/;
fu@3700
    40
}
fu@3700
    41
fu@3700
    42
sub match_full_path {
fu@3700
    43
    my $want_path = $_[0];
fu@3700
    44
    my $path_from_uri = $_[1];
fu@3700
    45
fu@3700
    46
    return $path_from_uri =~ /^$want_path$/;
fu@3700
    47
}
fu@3320
    48
fu@3716
    49
my %host_path_match = (
fu@3716
    50
    "bandcamp.com"          => qr!^/EmbeddedPlayer/!,
fu@3716
    51
    "blip.tv"               => qr!^/play/!,
fu@3716
    52
fu@3716
    53
    "www.dailymotion.com"   => qr!^/embed/video/!,
fu@3716
    54
    "dotsub.com"            => qr!^/media/!,
fu@3716
    55
fu@3716
    56
    "maps.google.com"       => qr!^/maps!,
fu@3716
    57
    "ext.nicovideo.jp"      => qr!^/thumb/!,
fu@3716
    58
fu@3716
    59
    "www.sbs.com.au"         => qr!/player/embed/!,  # best guess; language parameter before /player may vary
fu@3716
    60
    "www.scribd.com"        => qr!^/embeds/!,
fu@3716
    61
    "www.slideshare.net"    => qr!^/slideshow/embed_code/!,
fu@3716
    62
fu@3716
    63
    "player.vimeo.com"      => qr!^/video/\d+$!,
fu@3716
    64
);
fu@3716
    65
fu@3320
    66
LJ::Hooks::register_hook( 'allow_iframe_embeds', sub {
fu@3320
    67
    my ( $embed_url, %opts ) = @_;
fu@3320
    68
fu@3320
    69
    return 0 unless $embed_url;
fu@3320
    70
fu@3700
    71
    my $parsed_uri = URI->new( $embed_url );
fu@3700
    72
fu@3700
    73
    my $uri_scheme = $parsed_uri->scheme;
fu@3700
    74
    return 0 unless $uri_scheme eq "http" || $uri_scheme eq "https";
fu@3700
    75
fu@3700
    76
    my $uri_host = $parsed_uri->host;
fu@3700
    77
    my $uri_path = $parsed_uri->path;   # not including query
fu@3700
    78
fu@3716
    79
    my $path_regex = $host_path_match{$uri_host};
fu@3716
    80
    return 1 if $path_regex && ( $uri_path =~ $path_regex );
fu@3716
    81
fu@3320
    82
    ## YouTube (http://apiblog.youtube.com/2010/07/new-way-to-embed-youtube-videos.html)
fu@3700
    83
    if ( match_subdomain( "youtube.com", $uri_host ) || match_subdomain( "youtube-nocookie.com", $uri_host ) ) {
fu@3700
    84
        return 1 if match_full_path( qr!/embed/[-_a-zA-Z0-9]{11,}!, $uri_path );
fu@3700
    85
    }
fu@3320
    86
fu@3847
    87
    if ( $uri_host eq "commons.wikimedia.org" ) {
fu@3847
    88
        return 1 if $uri_path =~ m!^/wiki/File:! && $parsed_uri->query =~ m/embedplayer=yes/;
fu@3847
    89
    }
fu@3847
    90
fu@3320
    91
    return 0;
fu@3320
    92
fu@3320
    93
} );
fu@3320
    94
fu@3320
    95
1;