Monday, March 13, 2017

Remove SPAM from gmail automatically 2


One of the annoying this is that spammers learn new tricks.
My luck is that spammers are usually using generators to create loads of spam. 
This means that there are parts in the spam mail that always look similar. The trick is to find the similar parts and then simply filter that out. But do that well enough to to filter out the proper emails. This usually can be done by finding long strings that are the same in the spam messages.

In this new version you can also delete messages in such a way that they immediately disappear. For this you need more advanced google script commands.

In order to get this to work you need to go to the advanced services for your script:


Then choose to enable the gmail api:
 
Then is says you also have to enable the services in the gmail api console:
Then choose Google API Console:
Pick out the GMail API and enable the methods gmail.users.messages.remove or take it simpler and enable all the gmail api methods.


 Then simply test your code by running RemoveTrashSpam.



My complete code so far (until I change it again). I found an error in the previous version so here another update..

Time for a well improved update!! 
Spammers now apparantly use spam domains, domains they can get for free or a minor payment. Since these are almost always spam domains I simply block them all using a regex.

20170405: Minor update - added co.jp to the Message-ID blocklist
20170425: Minor update - added to the regex for the Message-ID, some spam comes from 192.3.something so this gets deleted as well.
20170427: Minor update - added .pe and .co domains, fixed some code like a.b which should be a\.b
20170601: Fixed: code try finally to try catch

function FilterSpam() {

  var threads= GmailApp.getSpamThreads();
  for (var i=0;i<threads.length;i++){
    //send to trash messages
    var messages=threads[i].getMessages();
    var spamcnt=0;
    for (var m=0;m<messages.length;m++){
      //get raw mail
      var mail_replyto=messages[m].getReplyTo();
      var mail_from=messages[m].getFrom();
      //var mail_to=messages[m].getTo();
      //var mail_bcc=messages[m].getBcc();
      //var mail_body=messages[m].getBody();
      //var mail_plainbody=messages[m].getPlainBody();  //the body without HTML
      //var mail_cc=messages[m].getCc();
      //var mail_date=messages[m].getDate();
      //var mail_id=messages[m].getId();  //not very interesting
      var mail_subject=messages[m].getSubject().toLowerCase();
      //var mail_thread=messages[m].getThread();
      //var mail_attachments=messages[m].getAttachments();
      var mail_rawcontent=messages[m].getRawContent();  //the raw email
      var mail_rawcontent_lowercase=mail_rawcontent.toLowerCase();
     
      //var noreturn=mail_rawcontent.findText('Return-Path: <>');
      if (mail_from.indexOf('.gov.cn')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('.ru')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('.xyz')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('simplemailwork.com')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('probalsa.com.ec')!=-1){
        spamcnt++;
      }else if (mail_subject.indexOf('million ') || mail_subject.indexOf(' beneficiary') || mail_subject.indexOf(' nigeria')){
        spamcnt++;
      }else if (mail_subject.indexOf('money gram')){
        spamcnt++;
      }else if (mail_rawcontent.indexOf('Return-Path: <>')!=-1){
        spamcnt++;
      }else if (mail_rawcontent.indexOf('User-Agent: Roundcube Webmail')!=-1){  //filter out spam mailer: User-Agent: Roundcube Webmail
        spamcnt++;
      }else if (mail_rawcontent.indexOf('X-CTCH-Spam:  Bulk')!=-1){
        spamcnt++;
      }else if (mail_rawcontent.indexOf('Received-SPF: softfail')!=-1){
        spamcnt++;
      }else if (mail_rawcontent_lowercase.indexOf('lottery')!=-1){
        spamcnt++;
      }else if (mail_rawcontent_lowercase.indexOf('inheritance')!=-1){
        spamcnt++;
      }else if (mail_rawcontent_lowercase.indexOf('nigeria')!=-1){
        spamcnt++;
      }else if (mail_rawcontent_lowercase.indexOf('bank draft')!=-1){
        spamcnt++;
      }else if (mail_rawcontent.indexOf('Content-Type: application/msword;')!=-1){ //if there are attachments then doc and docx are not allowed
        spamcnt++;
      }else if ((mail_rawcontent_lowercase.indexOf(' million ')!=-1) || (mail_rawcontent_lowercase.indexOf(' thousand ')!=-1)){
        if (mail_rawcontent_lowercase.indexOf(' usd')!=-1){
        spamcnt++;
        }else if (mail_rawcontent_lowercase.indexOf(' dollar')!=-1){
        spamcnt++;
        }
      }else if (mail_rawcontent_lowercase.indexOf(' western union')!=-1){
        spamcnt++;
      }else if ((mail_rawcontent_lowercase.indexOf(' visa ')!=-1) || (mail_rawcontent_lowercase.indexOf(' master ')!=-1) || (mail_rawcontent_lowercase.indexOf(' atm card')!=-1)){
        spamcnt++;
      }
      if (spamcnt!=0){
        messages[m].moveToTrash();
        //Gmail.Users.Messages.remove('me', threads[i].getId());
      }
      //Received-SPF: softfail (google.com: domain of transitioning info@info.com does not designate 95.0.185.114 as permitted sender) client-ip=95.0.185.114;
      //check if it has a return path via RegExp
      //var re = new RegExp('\\.|\\-', 'gi');
      //in the first ten lines should suffice
      //for (var l=0;l<10;l++){
    }
  }
  //TrashMail();
  //RemoveTrashSpam();
}


function RemoveTrashSpam(){
  //var regExp = new RegExp("(?:\\s)([a-z]+)", "gi");
  //var regExp = new RegExp("Return-Path: .*onmicrosoft.com>");

  // "i" is for case insensitive
  var regExpMessageID = RegExp("Message-ID: <[a-zA-Z0-9-@.]+(gq|ml|tk|px|cf|ga|jp|cf|it|tr|gr|ec|ar|pe|co|cantv\.net|local|prod\.outlook.com|onmicrosoft\.com|localdomain|trade|cisco\.com|co\.in|arcamax\.com|alice\.it|gov\.br|[0-9.]+)>","ig");
  //var regExpMailFrom = RegExp("[a-z-@.]+\.(cisco.com)","ig"); // "i" is for case insensitive
  //for all threads
  var threads= GmailApp.getTrashThreads();
  for (var i=0;i<threads.length;i++){
    var messages=threads[i].getMessages();
    var messages_length=messages.length;
    var delmsg=0;
    //for all messages in the thread
    for (var m=0;m<messages_length;m++){
      //get raw mail
      var mail_replyto=messages[m].getReplyTo();
      var mail_from=messages[m].getFrom();
      //var mail_to=messages[m].getTo();
      //var mail_bcc=messages[m].getBcc();
      var mail_body=messages[m].getBody();
      var mail_plainbody=messages[m].getPlainBody();  //the body without HTML
      //var mail_cc=messages[m].getCc();
      //var mail_date=messages[m].getDate();
      //var mail_id=messages[m].getId();
      var mail_subject=messages[m].getSubject().toLowerCase();
      //var mail_thread=messages[m].getThread();
      //var mail_attachments=messages[m].getAttachments();
      var mail_rawcontent=messages[m].getRawContent();  //the raw email
      var mail_rawcontent_lowercase=mail_rawcontent.toLowerCase();
      //var result = regex.exec(mail_rawcontent_lowercase);
      //regex: Message-ID: <[A-Za-z0-9-@]+.tk>   matches Message-ID: <......tk>
      //var name = "John Smith";
      //spammers often leave the Message-ID alone they need it, so we can use it to see if it comes from a bad domain.
      //nowadays there are all new domains for free such as .cf .men .px .ml .gq   etc  I simple block those domains.
      //Examples:
      //Message-ID: <3A03C937-BEA9-4589-867F-593CA3104153@qhgkoctono.ml>
      //Message-ID: <9E2BDE9F-0435-4038-AAE9-4E1B6681DD55@nyonwbeejb.cf>
      //Message-ID: <vdyfn9e6uh7g7gx@u3afzwjx7iql4gx.local.px>
      //Message-ID: <D6C74CE7-CFAD-4E1C-9430-3BEDED41D70D@zgyciflzz.gq>
      //prod.outlook.com|cantv.net|cisco.com
      var matchMessageID = regExpMessageID.exec(mail_rawcontent);
      //var matchMailFrom = regExpMailFrom.exec(mail_from);
      if (matchMessageID!=null){
        delmsg++;
      }else if (mail_rawcontent.indexOf('ImwiOiA2NCwgInMiOiAwLCAidSI6IDI0NzY4ODQyMywgInQiOiAxLCAic2Qi')!=-1){
        delmsg++;
      }else if (mail_from.indexOf('ocn.ne.jp')!=-1){
        delmsg++;
      }else if (mail_rawcontent_lowercase.indexOf('<http://bit.ly/')!=-1){
        delmsg++;
      }else if (mail_rawcontent.indexOf('If you do not wish to continue </em> receiving email newsletters')!=-1){
        delmsg++;
      }else if (mail_rawcontent_lowercase.indexOf('<http://tinyurl.com/')!=-1){
        delmsg++;
//      }else if (mail_rawcontent_lowercase.indexOf('a href="<http://tinyurl.com/')!=-1){
//        delmsg++;
//      }else if (mail_rawcontent_lowercase.indexOf('<img src="http://tinyurl.com/')!=-1){
//        delmsg++;
//      }else if (mail_rawcontent_lowercase.indexOf('onmicrosoft.com>')!=-1){
//        delmsg++;
        //Gmail.Users.Messages.remove('me', threads[m].getId());
      }else if (mail_rawcontent.indexOf('Content-Transfer-Encoding: base64')!=-1){
        if (mail_rawcontent.indexOf('.onmicrosoft.com>')!=-1){
        delmsg++;
        }
      }
     
      //var regExp = RegExp("Message-ID: <[a-zA-Z0-9-@.]+\.(gq|ml|tk|px|cf|ga|jp|cf|it|cantv.net)>","ig"); // "i" is for case insensitive
      if (delmsg>0){
        try {
          Gmail.Users.Messages.remove('me', messages[m].getId());
        } catch(e) {
        }
      }
      //Received-SPF: softfail (google.com: domain of transitioning info@info.com does not designate 95.0.185.114 as permitted sender) client-ip=95.0.185.114;
      //intersesting part is here: google.com: ....  does not designate .... as permitted sender  we can use that to block the stupid spammers as well
    }
  }
}