#-------------------------------------------------------------------- #-- get_tweet_and_tweetee2.pl #-- ========================= #-- #-- Author: TheSuggmeister #-- #-- Created : Oct/Nov/Dec 2009. #-- #-- Caveat: This script is provided as is. I know it's messy, but it works (or at least it did). #-- If you enhance it, re-write it etc, please feel free to email or tweet me. #-- #-- Description : -Called from maltego #-- -To call direct, user this format (perl get_tweet_and_tweetee2.pl arg1 arg2 arg3) #-- -Takes 3 args as input only use the second arg (which will be in the format uid=username#..... #-- -Extract text between uid= and # which gives you a twitter user id #-- -Use twitter username to format a GET request using LWP to grab all tweets written by the user #-- over a period of time. #-- -loop through every line gettin' stuff between and , i.e. the tweets #-- -Extract Usernames from tweets starting with @ and ending in space (\s) or any of the other special chars #-- -If we've not already done so, get the details for that user so we can obtain their real-twitter name, #-- id etc. #-- -write output to maltego #-- #-- #-------------------------------------------------------------------- #-------------------------------------------------------------------- #-- INPUT TYPE = AffiliationTwitter #-------------------------------------------------------------------- #!/usr/bin/perl #-------------------------------------------------------------------- #-- you'll need to decode the UTF-8 output from Twitter #-------------------------------------------------------------------- use Encode; #-------------------------------------------------------------------- #-- LWP stuff #-------------------------------------------------------------------- use HTML::Parse; use HTML::FormatText; use HTML::linkExtor; use LWP::Simple; use LWP::Simple qw($ua get); #-------------------------------------------------------------------- #-- inits #-------------------------------------------------------------------- # my $entityValue = $ARGV[0]; # never used, except from debugging my $entityValue2 = "FAIL"; my $entityValue2a = $ARGV[1]; # never used, except from debugging my $entityValue3 = $ARGV[2]; my $text = "Dave"; # I initialised this as "Dave" for debugging my $name_array_var =""; # array to store all the @usernames in a tweet. my @twitname = (); # array where we'll check to see who we've already got # data for (no point in getting them again) my $twitmatch = 0; # flag I used to track whether I had a match - i know, ugly. my $notwitmatchcnt = 0; # Count I used to track whether I had a unique user. my $twitmatchcnt = 0; # Count I used to track whether I had a duplicate user. $delim = "\n"; # \n $kids = 0; # Count I used to track number of children for each entity. #--------------------------------------------- #-- get twitter user id to start search with #--------------------------------------------- while ($entityValue2a =~ m/uid\=(.*?)\#/g ) { $entityValue2 = $1; } #------------------------------------------ #-- LWP set your proxy here if you have one #------------------------------------------ #$ua->proxy('http',$proxlst[$proxcnt]); #---------------------------------------------------------------------------------------------------- #-- LWP url we want to grab #-- #-- count - Specifies the number of statuses to retrieve. May not be greater than 200. #-- max_id = 5014583138 roughly 20th oct #---------------------------------------------------------------------------------------------------- # for gettweetee0 #$url = "http://twitter.com/statuses/user_timeline.xml?count=200&id=" . $entityValue2; # for gettweetee1 #$url = "http://twitter.com/statuses/user_timeline.xml?count=200&max_id=5014583138&id=" . $entityValue2; # for gettweetee2 $url = "http://twitter.com/statuses/user_timeline.xml?count=200&max_id=4857545179&id=" . $entityValue2; #---------------------------------- #-- LWP go grab that URL #---------------------------------- $page = get "$url"; #---------------------------------- #-- PARSE that page #---------------------------------- #- load page into @lines array @lines=split("$delim",$page); #---------------------------------- #-- FIRST OUTPUT TO MALTEGO #---------------------------------- print STDOUT "\n"; print STDOUT "\n"; print STDOUT "\n"; #-------------------------------------------------------------------- #-- loop through every line gettin' stuff between and , i.e. the tweets #-------------------------------------------------------------------- foreach $lines (@lines) { #--------------------------------------------- #-- START Parse each phrase returned #--------------------------------------------- while ($lines =~ m/(.*?)<\/text>/g ) { $text = $1; # $text contains whatever is betweem and #---------------------------------- #-- START - Grab @'s #---------------------------------- if ($text =~ /@/ ) { # # if the text line returns contains one or more @'s - do the following # $count1 = 0; my @name_array = ('0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0'); #I had a reason for initing the array like this, but i haven't a clue why. while ($text=~ m/\@(.*?)[\s|\!|\#|\:|\-|\@|\*|\(|\)|\.|\,]/g ) { $name_array[$count1] = $1; #Get Usernames starting with @ and ending in space (\s) or any of the other chars above - seems to work ok. $count1++; # increment array count } foreach (@name_array) { # # for every name in the name_array.... # $name_array_var = $_; if ($name_array_var eq 0) { #print "ugh $_ \n"; } else { if ($name_array_var eq "") { #print "ugh $_ \n"; } else { # # $name_array_var has something interesting in it. # # # check twitname array to see if we've already found this user # our twitter api rate limit is precious so we don't want to call it if # we don't need to. # foreach (@twitname){ if ($name_array_var eq $_) { $twitmatch = 1; $twitmatchcnt++; #name already exists } } # # if twitname array already contains $name_array_var then skip # if ($twitmatch == 0) { $twitname[$kids] = $name_array_var; #add name to twitname array #------------------------------------------------------------------------ # # START get real name # # Note!!!! This can max out your Twitter Rate Limit - # so either throttle it to sleep for 30 seconds between calls or # otherwise limit it. The rate limit = 150 calls per hour. #------------------------------------------------------------------------ my $real_name = "blank"; $url2 = "http://twitter.com/users/show/".$name_array_var.".xml"; $page2 = get "$url2"; @lines2=split("$delim",$page2); foreach $lines2 (@lines2) { while ($lines2 =~ m/(.*?)<\/name>/g ) { $real_name = $1; } # end - while ($lines =~ m/(.*?)<\/screen_name>/g ) } # end foreach $lines2 (@lines2) # ----------------------------------------------------------------------- # # END Get real name # #------------------------------------------------------------------------ # print STDOUT " "; print STDOUT "". $real_name. "\n"; #add a space print STDOUT "100\n"; print STDOUT "\n"; print STDOUT "" .$name_array_var . "\n"; print STDOUT "Twitter\n"; print STDOUT "" .$name_array_var . "\n"; print STDOUT "http://twitter.com/" .$_ . "\n"; print STDOUT "\n"; print STDOUT "\n"; $kids++; #increment kids count $notwitmatchcnt++; #name does not already exist } # end if ($twitmatch == 0) $twitmatch = 0; #reset twitmatch flag #print "reset twitmatch to ".$twitmatch."\n"; } #end if ($name_array_var eq "") } } # end - foreach (@name_array) #---------------------------------- #-- END- Grab @'s #---------------------------------- } # end - while ($lines =~ m/(.*?)<\/text>/g ) #------------------------------------ #-- END - Parse each phrase returned #------------------------------------ }; # if ($entityValue =~ /@/ ) }; # end - foreach $lines (@lines) #---------------------------------- #-- CLOSE OUTPUT TO MALTEGO #---------------------------------- print STDOUT "\n"; print STDOUT "\n"; print STDOUT " twitmatchcnt = $twitmatchcnt from get_tweet_and_tweetee2.pl\n"; print STDOUT " notwitmatchcnt = $notwitmatchcnt from get_tweet_and_tweetee2.pl\n"; print STDOUT " entityvalue = $entityValue from get_tweet_and_tweetee2.pl\n"; print STDOUT " entityvalue2a = $entityValue2a from get_tweet_and_tweetee2.pl\n"; print STDOUT " entityvalue2 = $entityValue2 from get_tweet_and_tweetee2.pl\n"; print STDOUT " entityvalue3 = $entityValue3 from get_tweet_and_tweetee2.pl\n"; print STDOUT " Results got $kids children for $entityValue2 from get_tweet_and_tweete2.pl\n"; print STDOUT "\n"; print STDOUT "\n"; print STDOUT "\n"; exit;