Friday, June 9, 2017

Remove SPAM from gmail automatically 5

The newest version is the beggining for a simple antispam. In this latest version I made it simpler to make your own list. With some time left I want to make the whole routine work in such way that it works on any given folder.
Be it so, the foldernames of the trash and spam folder ar different for each language.. I wonder hot that will work out.

function RemoveFromTrash(){
  //var regExp = new RegExp("(?:\\s)([a-z]+)", "gi");
  //var regExp = new RegExp("Return-Path: .*onmicrosoft.com>");

  //get MessageID
  var MessageID=RegExp("Message-ID: <.+>","ig");
  // "i" is for case insensitive
  var MessageIDBlock="Message-ID: <[a-zA-Z0-9-@.]+(gq|ml|tk|px|cf|ga|jp|cf|cc|it|id|tr|cl|gr|ec|ar|pe|co|ua|cr|hk|bn|za|in|br|id|it|mx|cn|life|atlis1|biz|glocal\.net|gov\.my|local|att\.net|prod\.outlook\.com|onmicrosoft\.com|localdomain|trade|cisco\.com|co\.in|arcamax\.com|alice\.it|co\.nz|gov\.br|telkom\.net|vevida\.net|global\.net|[0-9.]+)>";
  var MessageIDSMTPIN="Message-ID: <.+SMTPIN_ADDED_MISSING@mx.google.com>";
  var MessageReturnPath="Return-Path: <.+>";
  var MessageReceivedFrom="Received: from .+ ";
  var MessageReplyTo="Reply-To: <.+>";
  //for breaking up the header and the mailbody we need this
  var crcr=String.fromCharCode(10)+String.fromCharCode(10);
 
  //var regExpMailFrom = RegExp("[a-z-@.]+\.(cisco.com)","ig"); // "i" is for case insensitive
  //for all threads
  //find Content-Transfer-Encoding: base64

  var spam_from=['gov.cn>','.ru>','.xyz>','.cf>','za','domainandmarket.com>','probalsa.com.ec>','.men>','simplemailwork.com>','yahoo.co.jp>','.edu>','edu.tw>','edu.pl>'];
  spam_from=spam_from.concat(['cesterinevitably.com>','permanentbrain.com>','pro>']);
  var spam_replyto=['permanentbrain.com>'];
  var spam_messageid=['gov.cn>','.ru>','.xyz>','.cf>','za>','domainandmarket.com>','probalsa.com.ec>','.men>','simplemailwork.com>','edu.tw>','edu.pl>','cisco.com>'];
  spam_messageid=spam_messageid.concat(['.gq>','.ml>','.tk>','.px>','.cf>','.ga>','.jp>','.cf>','.cc>','.it>','.id>','.tr>','.gr>','.ec>','.ar>','.pe>','.co>','.ua>','.cr>','.hk>','.bn>','.za>']);
  spam_messageid=spam_messageid.concat(['in>','br>','id>','it>','mx>','cn>','life>','atlis1>','biz>','glocal.net>','gov.my>','local>','att.net>','prod.outlook.com>','onmicrosoft.com>','localdomain>']);
  spam_messageid=spam_messageid.concat(['trade>','cisco.com>','co.in>','arcamax.com>','alice.it>','co.nz>','gov>','.br>','telkom.net>','vevida.net>','global.net>','SMTPIN_ADDED_MISSING@mx.google.com>']);
  spam_messageid=spam_messageid.concat(['prod.outlook.com>','betemail.com>','cisco.com>']);

  var threads= GmailApp.getTrashThreads();
  //var threads=GmailApp.getSpamThreads();

  for (var i=0;i<threads.length;i++){
    var messages=threads[i].getMessages();
    var messages_length=messages.length;
    var cnt=0;
    //for all messages in the thread
    for (var m=0;m<messages_length;m++){
      //get raw mail
      //var mail_messageID=Message;
      var mail_replyto=messages[m].getReplyTo();
      var mail_from=messages[m].getFrom();
      var mail_to=messages[m].getTo();
      var mail_bcc=messages[m].getBcc();
      var mail_cc=messages[m].getCc();
      var mail_body=messages[m].getBody();
      var mail_plainbody=messages[m].getPlainBody();  //the body without HTML
      var mail_date=messages[m].getDate();
      var mail_id=messages[m].getId();
      var mail_subject=messages[m].getSubject().toLowerCase();
      //var mail_thread=messages[m].getThread();
      //var mail_attachments=messages[m].getAttachments();
      var mail_rawcontent=messages[m].getRawContent();  //the raw email
      var mail_rawcontent_lowercase=mail_rawcontent.toLowerCase();
      var x=mail_rawcontent.length;
      var cr=String.fromCharCode(13);
      var lf=String.fromCharCode(10);
      var crcr_location=mail_rawcontent.indexOf(cr+lf+cr+lf);
      var mail_header=mail_rawcontent.substr(0,crcr_location);
      var mail_header_lines=mail_header.split(cr+lf);

      var matchMessageID = mail_header.match(MessageID,"ig");
      var matchMessageReceivedFrom = mail_header.match(MessageReceivedFrom,"ig");
      var matchMessageIDBlock = mail_header.match(MessageIDBlock,"ig");
      var matchMessageIDSMTPIN = mail_header.match(MessageIDSMTPIN,"ig");
      var matchMessageReturnPath = mail_header.match(MessageReturnPath,"ig");
      var matchMessageReplyTo = mail_header.match(MessageReplyTo,"ig");
      //var matchMessageReceivedFrom=regExpMessageReceivedFrom.exec(mail_header);
      //dont bother the next line google does that for you
      //var matchMessageBase64 = mail_rawcontent.indexOf('Content-Transfer-Encoding: base64');
      var mail_messageid="";
      if (matchMessageID!=null){
        mail_messageid=matchMessageID[0];
      }


      for (var k=0;k<spam_from.length;k++){
        if (mail_from.indexOf(spam_from[k])>=0){
          cnt++;
          break;
        }
      }
     
      for (var k=0;k<spam_messageid.length;k++){
        if (mail_messageid.indexOf(spam_messageid[k])>=0){
          cnt++;
          break;
        }
      }

      for (var k=0;k<spam_replyto.length;k++){
        if (mail_replyto.indexOf(spam_replyto[k])>=0){
          cnt++;
          break;
        }
      }

      //see https://tools.ietf.org/html/rfc2045
      //var crcr_location=(mail_rawcontent.IndexOf(cr));
      //var result = regex.exec(mail_rawcontent_lowercase);
      //regex: Message-ID: <[A-Za-z0-9-@]+.tk>   matches Message-ID: <......tk>
      //var name = "John Smith";
      //spammers often leave the Message-ID alone they need it, so we can use it to see if it comes from a bad domain.
      //nowadays there are all new domains for free such as .cf .men .px .ml .gq   etc  I simple block those domains.
      //Examples:
      //Message-ID: <3A03C937-BEA9-4589-867F-593CA3104153@qhgkoctono.ml>
      //Message-ID: <9E2BDE9F-0435-4038-AAE9-4E1B6681DD55@nyonwbeejb.cf>
      //Message-ID: <vdyfn9e6uh7g7gx@u3afzwjx7iql4gx.local.px>
      //Message-ID: <D6C74CE7-CFAD-4E1C-9430-3BEDED41D70D@zgyciflzz.gq>
      //prod.outlook.com|cantv.net|cisco.com
     
      //Message-Id: <592b9af3.a284500a.1f2a9.e747SMTPIN_ADDED_MISSING@mx.google.com>
      if (matchMessageIDBlock!=null){
        if (matchMessageIDBlock.length>0){
          cnt++;
        }
      }
      /*
      else if (mail_from.indexOf('ocn.ne.jp>')!=-1){
        delmsg++;
      }else if (mail_from.indexOf('onmicrosoft.edu')!=-1){
        delmsg++;
      }else if (mail_from.indexOf('.ar>')!=-1){
        delmsg++;
      }else if (mail_from.indexOf('cesterinevitably.com')!=-1){
        delmsg++;
      }else if (mail_from.indexOf('domainandmarket.com')!=-1){
        delmsg++;
      }else if (mail_from.indexOf('permanentbrain.com')!=-1){
        delmsg++;
        }
      */
      else if (matchMessageID!=null){
        if (matchMessageID[0].indexOf('SMTPIN_ADDED_MISSING@mx.google.com')!=-1){
          cnt++;
        }
      }else if (matchMessageReceivedFrom!=null){
        if (matchMessageReceivedFrom.length>0){
          if (matchMessageReceivedFrom.indexOf('.cc')!=-1){
            cnt++;
          }
        }
      }else if (mail_replyto.indexOf('permanentbrain.com')!=-1){
        delmsg++;
      }else if (matchMessageReturnPath!=null){
        if (matchMessageReturnPath.length>=0){
          if (matchMessageReturnPath.indexOf('.at')!=-1){
            cnt++;
          }
        }
      }else if (mail_rawcontent_lowercase.indexOf('<http://bit.ly/')!=-1){
        cnt++;
//      }else if (mail_rawcontent.indexOf('If you do not wish to continue </em> receiving email newsletters')!=-1){
//        delmsg++;
      }else if (mail_rawcontent_lowercase.indexOf('<http://tinyurl.com/')!=-1){
        cnt++;
//      }else if (mail_rawcontent_lowercase.indexOf('a href="<http://tinyurl.com/')!=-1){
//        delmsg++;
//      }else if (mail_rawcontent_lowercase.indexOf('<img src="http://tinyurl.com/')!=-1){
//        delmsg++;
//      }else if (mail_rawcontent_lowercase.indexOf('onmicrosoft.com>')!=-1){
//        delmsg++;
        //Gmail.Users.Messages.remove('me', threads[m].getId());
      }else if (mail_rawcontent.indexOf('.onmicrosoft.com>')!=-1){
          cnt++;
      }
/*  this is not needed mail_body already holds a decoded 64 if it is in base64 or other, thanks to google.
      }else if (mail_rawcontent.indexOf('Content-Transfer-Encoding: base64')!=-1){
        //mail is base64 encoded so decode it
        mail_body=base64decode(mail_body);
      }
*/
       
      if (cnt>0){
        try {
          Gmail.Users.Messages.remove('me', messages[m].getId());
        } finally {
        }
      }
      //Received-SPF: softfail (google.com: domain of transitioning info@info.com does not designate 95.0.185.114 as permitted sender) client-ip=95.0.185.114;
      //intersesting part is here: google.com: ....  does not designate .... as permitted sender  we can use that to block the stupid spammers as well
    }
  }
}


function MoveFromSpamToTrash() {
  var threads=GmailApp.getSpamThreads();

  for (var i=0;i<threads.length;i++){
    //send to trash messages
    var messages=threads[i].getMessages();
    var cnt=0;
    for (var m=0;m<messages.length;m++){
      //get raw mail
      var mail_replyto=messages[m].getReplyTo();
      var mail_from=messages[m].getFrom();
      //var mail_to=messages[m].getTo();
      //var mail_bcc=messages[m].getBcc();
      //var mail_body=messages[m].getBody();
      //var mail_plainbody=messages[m].getPlainBody();  //the body without HTML
      //var mail_cc=messages[m].getCc();
      //var mail_date=messages[m].getDate();
      //var mail_id=messages[m].getId();  //useless info
      var mail_subject=messages[m].getSubject().toLowerCase();
      //var mail_thread=messages[m].getThread();
      //var mail_attachments=messages[m].getAttachments();
      var mail_rawcontent=messages[m].getRawContent();  //the raw email
      var mail_rawcontent_lowercase=mail_rawcontent.toLowerCase();
     
      //var noreturn=mail_rawcontent.findText('Return-Path: <>');
      var spam_from=['gov.cn>','.ru>','.xyz>','.cf>','za','domainandmarket.com>','probalsa.com.ec>','.men>','simplemailwork.com>','yahoo.co.jp>','.edu>','edu.tw>','edu.pl>'];
      spam_from=spam_from.concat(['permanentbrain.com>','cisco.com>']);
      var spam_replyto=['permanentbrain.com>'];
      var spam_messageid=['gov.cn>','.ru>','.xyz>','.cf>','za>','domainandmarket.com>','probalsa.com.ec>','.men>','simplemailwork.com>','edu.tw>','edu.pl>','cisco.com>'];
      spam_messageid=spam_messageid.concat(['.gq>','.ml>','.tk>','.px>','.cf>','.ga>','.jp>','.cf>','.cc>','.it>','.id>','.tr>','.gr>','.ec>','.ar>','.pe>','.co>','.ua>','.cr>','.hk>','.bn>','.za>']);
      spam_messageid=spam_messageid.concat(['in>','br>','id>','it>','mx>','cn>','life>','atlis1>','biz>','glocal.net>','gov.my>','local>','att.net>','prod.outlook.com>','onmicrosoft.com>','localdomain>']);
      spam_messageid=spam_messageid.concat(['trade>','cisco.com>','co.in>','arcamax.com>','alice.it>','co.nz>','gov>','.br>','telkom.net>','vevida.net>','global.net>','SMTPIN_ADDED_MISSING@mx.google.com>']);
      spam_messageid=spam_messageid.concat(['prod.outlook.com>','betemail.com>','cisco.com>']);

      for (var k=0;k<spam_from.length;k++){
        var tmp=spam_from[k];
        if (mail_from.indexOf(spam_from[k])>=0){
          cnt++;
          break;
        }
      }
     
/*
      for (var k=0;k<spam_messageid.length;k++){
        if (mail_messageid.indexOf(spam_messageid[k])>=0){
          cnt++;
          break;
        }
      }
*/
      for (var k=0;k<spam_replyto.length;k++){
        if (mail_replyto.indexOf(spam_replyto[k])>=0){
          cnt++;
          break;
        }
      }

      if (mail_subject.indexOf('million ')!=-1 || mail_subject.indexOf(' beneficiary')!=-1 || mail_subject.indexOf(' nigeria')!=-1){
        cnt++;
      }else if (mail_subject.indexOf('money gram')!=-1){
        cnt++;
      }else if (mail_rawcontent.indexOf('Return-Path: <>')!=-1){
        cnt++;
      }else if (mail_rawcontent.indexOf('User-Agent: Roundcube Webmail')!=-1){  //filter out spam mailer: User-Agent: Roundcube Webmail
        cnt++;
      }else if (mail_rawcontent.indexOf('X-CTCH-Spam:  Bulk')!=-1){
        cnt++;
      }else if (mail_rawcontent.indexOf('Received-SPF: softfail')!=-1){
        cnt++;
      }else if (mail_rawcontent_lowercase.indexOf('lottery')!=-1){
        cnt++;
      }else if (mail_rawcontent_lowercase.indexOf('inheritance')!=-1){
        cnt++;
      }else if (mail_rawcontent_lowercase.indexOf('nigeria')!=-1){
        cnt++;
      }else if (mail_rawcontent_lowercase.indexOf('bank draft')!=-1){
        cnt++;
      }else if (mail_rawcontent.indexOf('Content-Type: application/msword;')!=-1){ //if there are attachments then doc and docx are not allowed
        cnt++;
      }else if ((mail_rawcontent_lowercase.indexOf(' million ')!=-1) || (mail_rawcontent_lowercase.indexOf(' thousand ')!=-1)){
        if (mail_rawcontent_lowercase.indexOf(' usd')!=-1){
        cnt++;
        }else if (mail_rawcontent_lowercase.indexOf(' dollar')!=-1){
        cnt++;
        }
      }else if (mail_rawcontent_lowercase.indexOf(' western union')!=-1){
        cnt++;
      }else if ((mail_rawcontent_lowercase.indexOf(' visa ')!=-1) || (mail_rawcontent_lowercase.indexOf(' master ')!=-1) || (mail_rawcontent_lowercase.indexOf(' atm card')!=-1)){
        cnt++;
      }
      if (cnt!=0){
        messages[m].moveToTrash();
        //Gmail.Users.Messages.remove('me', threads[i].getId());
      }
      //Received-SPF: softfail (google.com: domain of transitioning info@info.com does not designate 95.0.185.114 as permitted sender) client-ip=95.0.185.114;
      //check if it has a return path via RegExp
      //var re = new RegExp('\\.|\\-', 'gi');
      //in the first ten lines should suffice
      //for (var l=0;l<10;l++){
    }
  }
  //TrashMail();
  //RemoveTrashSpam();
}