#--------------------------------------------------------------------
#-- get_tweet_and_tweetee2.pl
#-- =========================
#--
#-- Author: TheSuggmeister
#--
#-- Created : Oct/Nov/Dec 2009.
#--
#-- Caveat: This script is provided as is. I know it's messy, but it works (or at least it did).
#-- If you enhance it, re-write it etc, please feel free to email or tweet me.
#--
#-- Description : -Called from maltego
#-- -To call direct, user this format (perl get_tweet_and_tweetee2.pl arg1 arg2 arg3)
#-- -Takes 3 args as input only use the second arg (which will be in the format uid=username#.....
#-- -Extract text between uid= and # which gives you a twitter user id
#-- -Use twitter username to format a GET request using LWP to grab all tweets written by the user
#-- over a period of time.
#-- -loop through every line gettin' stuff between and , i.e. the tweets
#-- -Extract Usernames from tweets starting with @ and ending in space (\s) or any of the other special chars
#-- -If we've not already done so, get the details for that user so we can obtain their real-twitter name,
#-- id etc.
#-- -write output to maltego
#--
#--
#--------------------------------------------------------------------
#--------------------------------------------------------------------
#-- INPUT TYPE = AffiliationTwitter
#--------------------------------------------------------------------
#!/usr/bin/perl
#--------------------------------------------------------------------
#-- you'll need to decode the UTF-8 output from Twitter
#--------------------------------------------------------------------
use Encode;
#--------------------------------------------------------------------
#-- LWP stuff
#--------------------------------------------------------------------
use HTML::Parse;
use HTML::FormatText;
use HTML::linkExtor;
use LWP::Simple;
use LWP::Simple qw($ua get);
#--------------------------------------------------------------------
#-- inits
#--------------------------------------------------------------------
#
my $entityValue = $ARGV[0]; # never used, except from debugging
my $entityValue2 = "FAIL";
my $entityValue2a = $ARGV[1]; # never used, except from debugging
my $entityValue3 = $ARGV[2];
my $text = "Dave"; # I initialised this as "Dave" for debugging
my $name_array_var =""; # array to store all the @usernames in a tweet.
my @twitname = (); # array where we'll check to see who we've already got
# data for (no point in getting them again)
my $twitmatch = 0; # flag I used to track whether I had a match - i know, ugly.
my $notwitmatchcnt = 0; # Count I used to track whether I had a unique user.
my $twitmatchcnt = 0; # Count I used to track whether I had a duplicate user.
$delim = "\n"; # \n
$kids = 0; # Count I used to track number of children for each entity.
#---------------------------------------------
#-- get twitter user id to start search with
#---------------------------------------------
while ($entityValue2a =~ m/uid\=(.*?)\#/g ) {
$entityValue2 = $1;
}
#------------------------------------------
#-- LWP set your proxy here if you have one
#------------------------------------------
#$ua->proxy('http',$proxlst[$proxcnt]);
#----------------------------------------------------------------------------------------------------
#-- LWP url we want to grab
#--
#-- count - Specifies the number of statuses to retrieve. May not be greater than 200.
#-- max_id = 5014583138 roughly 20th oct
#----------------------------------------------------------------------------------------------------
# for gettweetee0
#$url = "http://twitter.com/statuses/user_timeline.xml?count=200&id=" . $entityValue2;
# for gettweetee1
#$url = "http://twitter.com/statuses/user_timeline.xml?count=200&max_id=5014583138&id=" . $entityValue2;
# for gettweetee2
$url = "http://twitter.com/statuses/user_timeline.xml?count=200&max_id=4857545179&id=" . $entityValue2;
#----------------------------------
#-- LWP go grab that URL
#----------------------------------
$page = get "$url";
#----------------------------------
#-- PARSE that page
#----------------------------------
#- load page into @lines array
@lines=split("$delim",$page);
#----------------------------------
#-- FIRST OUTPUT TO MALTEGO
#----------------------------------
print STDOUT "\n";
print STDOUT "\n";
print STDOUT "\n";
#--------------------------------------------------------------------
#-- loop through every line gettin' stuff between and , i.e. the tweets
#--------------------------------------------------------------------
foreach $lines (@lines) {
#---------------------------------------------
#-- START Parse each phrase returned
#---------------------------------------------
while ($lines =~ m/(.*?)<\/text>/g ) {
$text = $1; # $text contains whatever is betweem and
#----------------------------------
#-- START - Grab @'s
#----------------------------------
if ($text =~ /@/ ) {
#
# if the text line returns contains one or more @'s - do the following
#
$count1 = 0;
my @name_array = ('0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0'); #I had a reason for initing the array like this, but i haven't a clue why.
while ($text=~ m/\@(.*?)[\s|\!|\#|\:|\-|\@|\*|\(|\)|\.|\,]/g ) {
$name_array[$count1] = $1; #Get Usernames starting with @ and ending in space (\s) or any of the other chars above - seems to work ok.
$count1++; # increment array count
}
foreach (@name_array) {
#
# for every name in the name_array....
#
$name_array_var = $_;
if ($name_array_var eq 0) {
#print "ugh $_ \n";
}
else {
if ($name_array_var eq "") {
#print "ugh $_ \n";
}
else {
#
# $name_array_var has something interesting in it.
#
#
# check twitname array to see if we've already found this user
# our twitter api rate limit is precious so we don't want to call it if
# we don't need to.
#
foreach (@twitname){
if ($name_array_var eq $_) {
$twitmatch = 1;
$twitmatchcnt++; #name already exists
}
}
#
# if twitname array already contains $name_array_var then skip
#
if ($twitmatch == 0) {
$twitname[$kids] = $name_array_var; #add name to twitname array
#------------------------------------------------------------------------
#
# START get real name
#
# Note!!!! This can max out your Twitter Rate Limit -
# so either throttle it to sleep for 30 seconds between calls or
# otherwise limit it. The rate limit = 150 calls per hour.
#------------------------------------------------------------------------
my $real_name = "blank";
$url2 = "http://twitter.com/users/show/".$name_array_var.".xml";
$page2 = get "$url2";
@lines2=split("$delim",$page2);
foreach $lines2 (@lines2) {
while ($lines2 =~ m/(.*?)<\/name>/g ) {
$real_name = $1;
} # end - while ($lines =~ m/(.*?)<\/screen_name>/g )
} # end foreach $lines2 (@lines2)
# -----------------------------------------------------------------------
#
# END Get real name
#
#------------------------------------------------------------------------
#
print STDOUT " ";
print STDOUT "". $real_name. "\n"; #add a space
print STDOUT "100\n";
print STDOUT "\n";
print STDOUT "" .$name_array_var . "\n";
print STDOUT "Twitter\n";
print STDOUT "" .$name_array_var . "\n";
print STDOUT "http://twitter.com/" .$_ . "\n";
print STDOUT "\n";
print STDOUT "\n";
$kids++; #increment kids count
$notwitmatchcnt++; #name does not already exist
} # end if ($twitmatch == 0)
$twitmatch = 0; #reset twitmatch flag
#print "reset twitmatch to ".$twitmatch."\n";
} #end if ($name_array_var eq "")
}
} # end - foreach (@name_array)
#----------------------------------
#-- END- Grab @'s
#----------------------------------
} # end - while ($lines =~ m/(.*?)<\/text>/g )
#------------------------------------
#-- END - Parse each phrase returned
#------------------------------------
}; # if ($entityValue =~ /@/ )
}; # end - foreach $lines (@lines)
#----------------------------------
#-- CLOSE OUTPUT TO MALTEGO
#----------------------------------
print STDOUT "\n";
print STDOUT "\n";
print STDOUT " twitmatchcnt = $twitmatchcnt from get_tweet_and_tweetee2.pl\n";
print STDOUT " notwitmatchcnt = $notwitmatchcnt from get_tweet_and_tweetee2.pl\n";
print STDOUT " entityvalue = $entityValue from get_tweet_and_tweetee2.pl\n";
print STDOUT " entityvalue2a = $entityValue2a from get_tweet_and_tweetee2.pl\n";
print STDOUT " entityvalue2 = $entityValue2 from get_tweet_and_tweetee2.pl\n";
print STDOUT " entityvalue3 = $entityValue3 from get_tweet_and_tweetee2.pl\n";
print STDOUT " Results got $kids children for $entityValue2 from get_tweet_and_tweete2.pl\n";
print STDOUT "\n";
print STDOUT "\n";
print STDOUT "\n";
exit;